php没有内置的这样函数,我们只好自己写一个。
function utf8_to_unicode($word_str,$h_str='&#') { $unicode_word_arr = array(); $word_str_len = mb_strlen($word_str,'utf-8'); for($i=0;$i<$word_str_len;$i++){ $word = mb_substr($word_str,$i,1,'utf-8'); //if(preg_match("/^[\x{4e00}-\x{9fa5}]+$/u",$word)){ if(preg_match("/[^a-zA-z0-9\w\x20-\x7F]/",$word)){ $arr = str_split($word); $bin_str = ''; foreach ($arr as $value){ $bin_str .= decbin(ord($value)); } $bin_str = preg_replace('/^.{4}(.{4}).{2}(.{6}).{2}(.{6})$/','$1$2$3', $bin_str); $unicode_word_arr []= $h_str.bindec($bin_str).';'; //$unicode_word_arr []= dechex(bindec($bin_str)); //如想返回十六进制4f60,用这句 }else{ $unicode_word_arr []= $word; } } $unicode_word_str = implode('',$unicode_word_arr); return $unicode_word_str; }
-----------------------------------
function unicode_to_utf8($str) { $str = rawurldecode($str); preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U",$str,$r); $ar = $r[0]; foreach($ar as $k=>$v) { if(substr($v,0,2) == "%u"){ $ar[$k] = iconv("UCS-2BE","UTF-8",pack("H4",substr($v,-4))); }elseif(substr($v,0,3) == "&#x"){ $ar[$k] = iconv("UCS-2BE","UTF-8",pack("H4",substr($v,3,-1))); }elseif(substr($v,0,2) == "&#") { $ar[$k] = iconv("UCS-2BE","UTF-8",pack("n",substr($v,2,-1))); } } return join("",$ar); }