download this file: class.mb.php view text/plain: class.mb.php file encoding: UTF-8 [goback]
<?php
##
## this file name is 'class.mb.php'
##
## multi-byte string parsing(korean)
##
## [author]
## - Chilbong Kim, <san2(at)linuxchannel.net>
## - http://linuxchannel.net/
##
## [changes]
## - 2004.12.24 : patch _htmlwordwrap_callback()
## - 2003.08.26 : add mb::strlen()
## - 2003.08.24 : renew mb::htmlwordwrap()
## - 2003.08.23 : new build
##
## [references]
## - http://www.php.net/manual/en/function.strpos.php
## - http://www.php.net/manual/en/function.strrpos.php
## - http://www.maisan.com/EDU/ip/basic/ksc5601.htm
## - http://item-hunter.com/~chang/src/lecture/MEMO/z00015.html
## - http://camars.kaist.ac.kr/~dtkim/java/unicode_hangul.html
## - http://www.w3c.or.kr/i18n/hangul-i18n/ko-code.html
## - http://www.jounlai.com/web/prg/kor/
## - http://www.jounlai.com/web/prg/kor/handiv.html
## - http://www.jounlai.com/web/prg/kor/hancom.html
## - http://trade.chonbuk.ac.kr/~leesl/code/
## - http://doc.ddart.net/hangul/hangulcode.html
##
## [usage]
##
## [example]
##
class mb
{
var $classname = 'mb';
## mb::htmlwordwrap() html style
##
## Wraps a multi-byte html style string to a given number of characters
## using a string break character
##
## 2003.08.21, san2(at)linuxchannel.net
##
## usage : string htmlwordwrap ( string str [, int width [, string break [, boolean cut]]])
## : 'cut' is alway TRUE(not use)
##
function &htmlwordwrap($str, $width=76, $break="\n", $cut=TRUE)
{
if(strlen($str) < $width) return $str;
## set global variables, not object
##
$GLOBALS['_SAFE_HTMLWORDWRAP_WIDTH'] = $width = (int) $width;
$GLOBALS['_SAFE_HTMLWORDWRAP_BREAK'] = $break = (string) $break;
$str = preg_replace_callback('/([^\s]{14,})/',
array($this?$this:'mb','_htmlwordwrap_callback'),$str);
$str = wordwrap($str,$width,"\n");
return $str;
}
function &_htmlwordwrap_callback($matches)
{
static $width, $break, $lpos, $substr;
$width = $GLOBALS['_SAFE_HTMLWORDWRAP_WIDTH']; // not object
$break = $GLOBALS['_SAFE_HTMLWORDWRAP_BREAK']; // not object
if(strlen($matches[0]) <= $width) return $matches[0];
if(strlen(strip_tags($matches[0])) <= $width) return $matches[0];
if(preg_match(';^(HREF|SRC)*=[\'"]*;i',$matches[1])) return $matches[0];
## fixed, for search string
##
if(preg_match(';<([^<>]+)>.+</\1>;i',$matches[1]))
{
//$width -= 34;
$lpos = mb::strlpos($matches[1],'></',10) + 10;
$width = ($lpos > $width) ? (int)($width/2) : ($width - $lpos);
}
if(preg_match('/[\x80-\xFE]+/',$matches[1]))
{ $substr = mb::wordwrap($matches[1],$width,$break,1); }
else
{ $substr = wordwrap($matches[1],$width,$break,1); }
## return whole string, patch 2005.01.27
##
$matches[0] = str_replace($matches[1],$substr,$matches[0]);
$matches[0] = preg_replace(";<\n/([^>]+)>;","</\\1\n>",$matches[0]);
return $matches[0];
}
## mb::strcutpos()
##
## Find position of cutting of a multi-byte string
##
## 2003.08.21, san2(at)linuxchannel.net
##
## usage : int strcutpos ( string str, int length )
##
function &strcutpos($str, $length)
{
return (int) strlen(mb::substr($str,0,$length));
}
## mb::substr()
##
## string substr ( string str, int start [, int length [, string suffix]])
##
function &substr($str, $start, $length=0, $suffix='')
{
static $i, $strlen, $pos, $substr;
$start = (int)$start;
$length = (int)$length;
$strlen = strlen($str);
if($strlen <= $length) return $str;
if($length <= 0) $length += $strlen;
$i = $length - 1;
while(ord($str[$i]) & 0x80) $i--;
$pos = $length - (($length + $i + 1) & 1);
$substr = substr($str,$start,$pos);
$substr = preg_replace('/(&#[0-9]*|&)$/','',$substr);
return $substr.$suffix; // string
}
## mb::strcut(), text style
##
## string strcut ( string str, int length [, string suffix])
##
function &strcut($str, $length, $suffix='')
{
return mb::substr($str,0,$length,$suffix);
}
## mb::wordwrap(), text style
##
## Wraps a multi-byte text style string to a given number of characters
## using a string break character
##
## 2003.08.21, san2(at)linuxchannel.net
##
## usage : string wordwrap ( string str [, int width [, string break [, boolean cut]]])
## : 'cut' is alway TRUE(not use)
##
function &wordwrap($str, $width=75, $break="\n", $cut=TRUE)
{
static $scp = 0; // integer, start of cut position
static $end, $pos, $new;
$width = (int)$width; // args check
$end = strlen($str) + $width; // end position
$scp = 0; // reset
$new = ''; // string
for($i=$width; $i<$end; $i+=$width)
{
$pos = mb::strcutpos($str,$i);
$new .= substr($str,(int)$scp,(int)($pos-$scp)).$break;
$scp = $pos;
}
return $new; // string
}
## mb::chunk_split()
##
## alias of mb::wordwrap()
##
## usage : string chunk_split ( string body [, int chunklen [, string end]])
##
function &chunk_split($body, $chunklen=76, $end="\r\n")
{
return mb::wordwrap($body,$chunklen,$end,1);
}
## mb::strlpos()
##
## Find position of last occurrence of a string
##
## 2003.08.21, san2(at)linuxchannel.net
##
## usage : int strlpos ( string haystack, string needle [, int offset])
##
## http://www.php.net/manual/en/function.strpos.php
## http://www.php.net/manual/en/function.strrpos.php
##
function &strlpos($str, $substr, $offset=0)
{
static $strlen, $lpos;
$strlen = strlen($str);
$offset = (int)$offset;
if($offset < 0) $offset = $strlen + $offset;
if(!strpos($str,$substr,$offset)) return 0;
$lpos = strpos(strrev($str),strrev($substr),0);
return (int)($strlen - $lpos);
}
## mb::mbchars()
##
## Return length of only multi-byte string, or reference chars
##
## usage : int mbchars( string str [, array &mbchars])
##
function &mbchars($str, $mbchars=array())
{
static $mblen, $chars;
$mblen = preg_match_all('/[\x80-\xFE]/',$str,&$chars);
$mbchars = $chars[0];
return (int)$mblen;
}
## mb::strlen()
##
## Return string length, multi-byte character is counted as 1.
##
## usage : int strlen ( string str)
##
function &strlen($str)
{
return (int)(strlen($str) - mb::mbchars($str)/2);
}
} // end od class
?>