
php没有内置的这样函数,我们只好自己写一个。
function utf8_to_unicode($word_str,$h_str='&#') {
$unicode_word_arr = array();
$word_str_len = mb_strlen($word_str,'utf-8');
for($i=0;$i<$word_str_len;$i++){
$word = mb_substr($word_str,$i,1,'utf-8');
//if(preg_match("/^[\x{4e00}-\x{9fa5}]+$/u",$word)){
if(preg_match("/[^a-zA-z0-9\w\x20-\x7F]/",$word)){
$arr = str_split($word);
$bin_str = '';
foreach ($arr as $value){
$bin_str .= decbin(ord($value));
}
$bin_str = preg_replace('/^.{4}(.{4}).{2}(.{6}).{2}(.{6})$/','$1$2$3', $bin_str);
$unicode_word_arr []= $h_str.bindec($bin_str).';';
//$unicode_word_arr []= dechex(bindec($bin_str)); //如想返回十六进制4f60,用这句
}else{
$unicode_word_arr []= $word;
}
}
$unicode_word_str = implode('',$unicode_word_arr);
return $unicode_word_str;
}-----------------------------------
function unicode_to_utf8($str) {
$str = rawurldecode($str);
preg_match_all("/(?:%u.{4})|&#x.{4};|&#\d+;|.+/U",$str,$r);
$ar = $r[0];
foreach($ar as $k=>$v) {
if(substr($v,0,2) == "%u"){
$ar[$k] = iconv("UCS-2BE","UTF-8",pack("H4",substr($v,-4)));
}elseif(substr($v,0,3) == "&#x"){
$ar[$k] = iconv("UCS-2BE","UTF-8",pack("H4",substr($v,3,-1)));
}elseif(substr($v,0,2) == "&#") {
$ar[$k] = iconv("UCS-2BE","UTF-8",pack("n",substr($v,2,-1)));
}
}
return join("",$ar);
}