php教程

超轻量级php框架startmvc

PHP切割汉字的常用方法实例总结

更新时间:2020-04-04 18:53:26 作者:startmvc
本文实例讲述了PHP切割汉字的常用方法。分享给大家供大家参考,具体如下:<?php/*@UTF

本文实例讲述了PHP切割汉字的常用方法。分享给大家供大家参考,具体如下:


<?php
/*
@UTF-8编码的字符可能由1~3个字节组成。
*/
/*--------------------------方法一截取中文字符串方法------------------------------*/
function msubstr($str, $start, $len)
{
 $tmpstr = "";
 $strlen = $start + $len;
 for ($i = 0; $i < $strlen; $i++) {
 if (ord(substr($str, $i, 1)) > 0xa0) //ord()函数返回字符串的第一个字符的ASCII值
 {
 $tmpstr .= substr($str, $i, 2);
 $i++;
 } else {
 $tmpstr .= substr($str, $i, 1);
 }
 }
 return $tmpstr;
}
/*----------------------------第二种方法-----------------------------------*/
//截取的是UTF-8字符串
function utf_substr($str, $len)
{
 $new_str = [];
 for ($i = 0; $i < $len; $i++) {
 $tem_str = substr($str, 0, 1);
 if (ord($tem_str > 127)) {
 $i++;
 if ($i < $len) {
 $new_str[] = substr($str, 0, 3);
 $str = substr($str, 3);
 }
 } else {
 $new_str[] = substr($str, 0, 1);
 $str = substr($str, 1);
 }
 }
 return join($new_str);//join()函数把数组元素组合为一个字符串
}
/*-------------------------------------第三种方法(UTF-8)--------------------------------*/
function cutstr($string, $length)
{
 preg_match_all("/[\x01-\x7f]|[\xc2-\xdf]|[\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info);
 $wordscut = "";
 $j = 0;
 for ($i = 0; $i < count($info[0]); $i++) {
 $wordscut .= $info[0][$i];
 $j = ord($info[0][$i]) > 127 ? $j + 2 : $j + 1;
 if ($j > $length - 3) {
 return $wordscut . "...";
 }
 }
 return join('', $info[0]);
}
$string = "312哈哈,这个组合很难切割哦";
echo cutstr($string, 10);
/*---------------------------------下面是曾经用过的截取第三个的字符串的------------------------------*/
// $name1 = mysql_result($my_rst,0,"name");
// $name = preg_match("/([1-9][0-9]+)/",$name1,$r);
// $name = $r[0];
// if($name == ""){
// $name=preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,2}'.
// '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,1}).*#s',
// '$1',$name1);
// }
/*--------------------------------------------第四种方法(UTF-8)---------------------------------------------*/
function cut_str($sourcestr, $cutlength)
{
 $returnstr = '';
 $i = 0;
 $n = 0;
 $str_length = strlen($sourcestr);//字符串的字节数
 while ($n < $cutlength && $i <= $str_length) {
 $temp_str = substr($sourcestr, $i, 1);
 $ascnum = ord($temp_str);//得到字符串中第$i位字符的ASCII码
 if ($ascnum >= 224) {
 $returnstr = $returnstr . substr($sourcestr, $i, 3);//根据UTF-8编码规范,将3个连续的字符计为单个字符
 $i = $i + 3;//实际Byte记为3
 $n++;//字串长度为1
 } elseif ($ascnum >= 192)//如果ASCII位高于192
 {
 $returnstr = $returnstr . substr($sourcestr, $i, 2);//根据UTF-8编码规范,将2个连续的字符记为单个字符
 $i = $i + 2;//实际Byte记为2
 $n++;//字串长度为1
 } elseif ($ascnum >= 65 && $ascnum <= 90)//如果是大写字母
 {
 $returnstr = $returnstr . substr($sourcestr, $i, 1);
 $i = $i + 1;//byte记为1
 $n++;//但考虑到整体美观,大写字母计成一个高位字符
 } else {
 $returnstr = $returnstr . substr($sourcestr, $i, 1);
 $i = $i + 1;//实际的Byte记为1
 $n = $n + 0.5;//小写字母和半角标点等与半个高位字符宽...
 }
 }
 if ($str_length > $cutlength) {
 $returnstr = $returnstr . "...";//超过长度时在尾处加上省略号
 }
 return $returnstr;
}
/*--------------------第五种方法(UTF-8)---------------------------------------------*/
function FSubstr($title, $start, $len = "", $magic = true)
{
 if ($len == "") $len = strlen($title);
 if ($start != 0) {
 $startv = ord(substr($title, $start, 1));
 if ($startv >= 128) {
 if ($startv < 192) {
 for ($i = $start - 1; $i > 0; $i--) {
 $tempv = ord(substr($title, $i, 1));
 if ($tempv >= 192) break;
 }
 $start = $i;
 }
 }
 }
 if (strlen($title) <= $len) return substr($title, $start, $len);
 $alen = 0;
 $blen = 0;
 $realnum = 0;
 $length = 0;
 for ($i = $start; $i < strlen($title); $i++) {
 $ctype = 0;
 $cstep = 0;
 $cur = substr($title, $i, 1);
 if ($cur == "&") {
 if (substr($title, $i, 4) == "<") {
 $cstep = 4;
 $length += 4;
 $i += 3;
 $realnum++;
 if ($magic) {
 $alen++;
 }
 } elseif (substr($title, $i, 4) == ">") {
 $cstep = 4;
 $length += 4;
 $i += 3;
 $realnum++;
 if ($magic) {
 $alen++;
 }
 } elseif (substr($title, $i, 5) == "&") {
 $cstep = 5;
 $length += 5;
 $i += 4;
 $realnum++;
 if ($magic) {
 $alen++;
 }
 } elseif (substr($title, $i, 6) == """) {
 $cstep = 6;
 $length += 6;
 $i += 5;
 $realnum++;
 if ($magic) {
 $alen++;
 }
 } elseif (preg_match("/&#(\d+);?/i", substr($title, $i), $match)) {
 $cstep = strlen($match[0]);
 $length += strlen($match[0]);
 $i += strlen($match[0]) - 1;
 $realnum++;
 if ($magic) {
 $blen++;
 $ctype = 1;
 }
 }
 } else {
 if (ord($cur) >= 252) {
 $cstep = 6;
 $length += 6;
 $i += 5;
 $realnum++;
 if ($magic) {
 $blen++;
 $ctype = 1;
 }
 } elseif (ord($cur) >= 248) {
 $cstep = 5;
 $length += 5;
 $i += 4;
 $realnum++;
 if ($magic) {
 $ctype = 1;
 $blen++;
 }
 } elseif (ord($cur) >= 240) {
 $cstep = 4;
 $length += 4;
 $i += 3;
 $realnum++;
 if ($magic) {
 $blen++;
 $ctype = 1;
 }
 } elseif (ord($cur) >= 224) {
 $cstep = 3;
 $length += 3;
 $i += 2;
 $realnum++;
 if ($magic) {
 $ctype = 1;
 $blen++;
 }
 } elseif (ord($cur) >= 192) {
 $ctype = 2;
 $length += 2;
 $i += 1;
 $realnum++;
 if ($magic) {
 $blen++;
 $ctype = 1;
 }
 } elseif (ord($cur) >= 128) {
 $length += 1;
 } else {
 $cstep = 1;
 $length += 1;
 $realnum++;
 if ($magic) {
 if (ord($cur) >= 65 && ord($cur) <= 90) {
 $blen++;
 } else {
 $alen++;
 }
 }
 }
 }
 if ($magic) {
 if (($blen * 2 + $alen) == ($len * 2)) break;
 if (($blen * 2 + $alen) == ($len * 2) + 1) {
 if ($ctype == 1) {
 $length -= $cstep;
 break;
 } else {
 break;
 }
 }
 } else {
 if ($realnum == $len) break;
 }
 }
 unset($cur);
 unset($alen);
 unset($blen);
 unset($realnum);
 unset($ctype);
 unset($cstep);
 return substr($title, $start, $length);
}
function utf8Substr($str, $from, $len)
{
 return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $from . '}' .
 '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $len . '}).*#s',
 '$1', $str);
}
$title = "你哈珀niad1纳斯达wop asdni你爱谁都没阿斯顿撒旦12ccs- sd";
$title = utf8Substr($title, 0, 15);
echo $title;
?>

PHP 切割汉字