00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00136 class t3lib_cs {
00137 var $noCharByteVal=63;
00138
00139
00140 var $parsedCharsets=array();
00141
00142
00143 var $caseFolding=array();
00144
00145
00146 var $toASCII=array();
00147
00148
00149 var $twoByteSets=array(
00150 'ucs-2'=>1,
00151 );
00152
00153
00154 var $fourByteSets=array(
00155 'ucs-4'=>1,
00156 'utf-32'=>1,
00157 );
00158
00159
00160 var $eucBasedSets=array(
00161 'gb2312'=>1,
00162 'big5'=>1,
00163 'euc-kr'=>1,
00164 'shift_jis'=>1,
00165 );
00166
00167
00168
00169 var $synonyms=array(
00170 'us' => 'ascii',
00171 'us-ascii'=> 'ascii',
00172 'cp819' => 'iso-8859-1',
00173 'ibm819' => 'iso-8859-1',
00174 'iso-ir-100' => 'iso-8859-1',
00175 'iso-ir-109' => 'iso-8859-2',
00176 'iso-ir-148' => 'iso-8859-9',
00177 'iso-ir-199' => 'iso-8859-14',
00178 'iso-ir-203' => 'iso-8859-15',
00179 'csisolatin1' => 'iso-8859-1',
00180 'csisolatin2' => 'iso-8859-2',
00181 'csisolatin3' => 'iso-8859-3',
00182 'csisolatin5' => 'iso-8859-9',
00183 'csisolatin8' => 'iso-8859-14',
00184 'csisolatin9' => 'iso-8859-15',
00185 'csisolatingreek' => 'iso-8859-7',
00186 'iso-celtic' => 'iso-8859-14',
00187 'latin1' => 'iso-8859-1',
00188 'latin2' => 'iso-8859-2',
00189 'latin3' => 'iso-8859-3',
00190 'latin5' => 'iso-8859-9',
00191 'latin6' => 'iso-8859-10',
00192 'latin8' => 'iso-8859-14',
00193 'latin9' => 'iso-8859-15',
00194 'l1' => 'iso-8859-1',
00195 'l2' => 'iso-8859-2',
00196 'l3' => 'iso-8859-3',
00197 'l5' => 'iso-8859-9',
00198 'l6' => 'iso-8859-10',
00199 'l8' => 'iso-8859-14',
00200 'l9' => 'iso-8859-15',
00201 'cyrillic' => 'iso-8859-5',
00202 'arabic' => 'iso-8859-6',
00203 'tis-620' => 'iso-8859-11',
00204 'win874' => 'windows-874',
00205 'win1250' => 'windows-1250',
00206 'win1251' => 'windows-1251',
00207 'win1252' => 'windows-1252',
00208 'win1253' => 'windows-1253',
00209 'win1254' => 'windows-1254',
00210 'win1255' => 'windows-1255',
00211 'win1256' => 'windows-1256',
00212 'win1257' => 'windows-1257',
00213 'win1258' => 'windows-1258',
00214 'cp1250' => 'windows-1250',
00215 'cp1251' => 'windows-1251',
00216 'cp1252' => 'windows-1252',
00217 'ms-ee' => 'windows-1250',
00218 'ms-ansi' => 'windows-1252',
00219 'ms-greek' => 'windows-1253',
00220 'ms-turk' => 'windows-1254',
00221 'winbaltrim' => 'windows-1257',
00222 'koi-8ru' => 'koi-8r',
00223 'koi8r' => 'koi-8r',
00224 'cp878' => 'koi-8r',
00225 'mac' => 'macroman',
00226 'macintosh' => 'macroman',
00227 'euc-cn' => 'gb2312',
00228 'x-euc-cn' => 'gb2312',
00229 'euccn' => 'gb2312',
00230 'cp936' => 'gb2312',
00231 'big-5' => 'big5',
00232 'cp950' => 'big5',
00233 'eucjp' => 'euc-jp',
00234 'sjis' => 'shift_jis',
00235 'shift-jis' => 'shift_jis',
00236 'cp932' => 'shift_jis',
00237 'cp949' => 'euc-kr',
00238 'utf7' => 'utf-7',
00239 'utf8' => 'utf-8',
00240 'utf16' => 'utf-16',
00241 'utf32' => 'utf-32',
00242 'utf8' => 'utf-8',
00243 'ucs2' => 'ucs-2',
00244 'ucs4' => 'ucs-4',
00245 );
00246
00247
00248 var $lang_to_script=array(
00249
00250
00251
00252
00253 'ar' => 'arabic',
00254 'bg' => 'cyrillic',
00255 'bs' => 'east_european',
00256 'cs' => 'east_european',
00257 'da' => 'west_european',
00258 'de' => 'west_european',
00259 'es' => 'west_european',
00260 'et' => 'estonian',
00261 'eo' => 'unicode',
00262 'eu' => 'west_european',
00263 'fa' => 'arabic',
00264 'fi' => 'west_european',
00265 'fo' => 'west_european',
00266 'fr' => 'west_european',
00267 'gr' => 'greek',
00268 'he' => 'hebrew',
00269 'hi' => 'unicode',
00270 'hr' => 'east_european',
00271 'hu' => 'east_european',
00272 'iw' => 'hebrew',
00273 'is' => 'west_european',
00274 'it' => 'west_european',
00275 'ja' => 'japanese',
00276 'kl' => 'west_european',
00277 'ko' => 'korean',
00278 'lt' => 'lithuanian',
00279 'lv' => 'west_european',
00280 'nl' => 'west_european',
00281 'no' => 'west_european',
00282 'pl' => 'east_european',
00283 'pt' => 'west_european',
00284 'ro' => 'east_european',
00285 'ru' => 'cyrillic',
00286 'sk' => 'east_european',
00287 'sl' => 'east_european',
00288 'sr' => 'cyrillic',
00289 'sv' => 'west_european',
00290 'th' => 'thai',
00291 'uk' => 'cyrillic',
00292 'vi' => 'vietnamese',
00293 'zh' => 'chinese',
00294
00295
00296 'ara' => 'arabic',
00297 'bgr' => 'cyrillic',
00298 'cat' => 'west_european',
00299 'chs' => 'simpl_chinese',
00300 'cht' => 'trad_chinese',
00301 'csy' => 'east_european',
00302 'dan' => 'west_european',
00303 'deu' => 'west_european',
00304 'dea' => 'west_european',
00305 'des' => 'west_european',
00306 'ena' => 'west_european',
00307 'enc' => 'west_european',
00308 'eng' => 'west_european',
00309 'enz' => 'west_european',
00310 'enu' => 'west_european',
00311 'euq' => 'west_european',
00312 'fos' => 'west_european',
00313 'far' => 'arabic',
00314 'fin' => 'west_european',
00315 'fra' => 'west_european',
00316 'frb' => 'west_european',
00317 'frc' => 'west_european',
00318 'frs' => 'west_european',
00319 'ell' => 'greek',
00320 'heb' => 'hebrew',
00321 'hin' => 'unicode',
00322 'hun' => 'east_european',
00323 'isl' => 'west_euorpean',
00324 'ita' => 'west_european',
00325 'its' => 'west_european',
00326 'jpn' => 'japanese',
00327 'kor' => 'korean',
00328 'lth' => 'lithuanian',
00329 'lvi' => 'west_european',
00330 'msl' => 'west_european',
00331 'nlb' => 'west_european',
00332 'nld' => 'west_european',
00333 'nor' => 'west_european',
00334 'non' => 'west_european',
00335 'plk' => 'east_european',
00336 'ptg' => 'west_european',
00337 'ptb' => 'west_european',
00338 'rom' => 'east_european',
00339 'rus' => 'cyrillic',
00340 'slv' => 'east_european',
00341 'sky' => 'east_european',
00342 'srl' => 'east_european',
00343 'srb' => 'cyrillic',
00344 'esp' => 'west_european',
00345 'esm' => 'west_european',
00346 'esn' => 'west_european',
00347 'sve' => 'west_european',
00348 'tha' => 'thai',
00349 'trk' => 'turkish',
00350 'ukr' => 'cyrillic',
00351
00352 'arabic' => 'arabic',
00353 'basque' => 'west_european',
00354 'bosnian' => 'east_european',
00355 'bulgarian' => 'east_european',
00356 'catalan' => 'west_european',
00357 'croatian' => 'east_european',
00358 'czech' => 'east_european',
00359 'danish' => 'west_european',
00360 'dutch' => 'west_european',
00361 'english' => 'west_european',
00362 'esperanto' => 'unicode',
00363 'estonian' => 'estonian',
00364 'faroese' => 'west_european',
00365 'farsi' => 'arabic',
00366 'finnish' => 'west_european',
00367 'french' => 'west_european',
00368 'galician' => 'west_european',
00369 'german' => 'west_european',
00370 'greek' => 'greek',
00371 'greenlandic' => 'west_european',
00372 'hebrew' => 'hebrew',
00373 'hindi' => 'unicode',
00374 'hungarian' => 'east_european',
00375 'icelandic' => 'west_european',
00376 'italian' => 'west_european',
00377 'latvian' => 'west_european',
00378 'lettish' => 'west_european',
00379 'lithuanian' => 'lithuanian',
00380 'malay' => 'west_european',
00381 'norwegian' => 'west_european',
00382 'persian' => 'arabic',
00383 'polish' => 'east_european',
00384 'portuguese' => 'west_european',
00385 'russian' => 'cyrillic',
00386 'romanian' => 'east_european',
00387 'serbian' => 'cyrillic',
00388 'slovak' => 'east_european',
00389 'slovenian' => 'east_european',
00390 'spanish' => 'west_european',
00391 'svedish' => 'west_european',
00392 'that' => 'thai',
00393 'turkish' => 'turkish',
00394 'ukrainian' => 'cyrillic',
00395 );
00396
00397
00398 var $script_to_charset_unix=array(
00399 'west_european' => 'iso-8859-1',
00400 'estonian' => 'iso-8859-1',
00401 'east_european' => 'iso-8859-2',
00402 'baltic' => 'iso-8859-4',
00403 'cyrillic' => 'iso-8859-5',
00404 'arabic' => 'iso-8859-6',
00405 'greek' => 'iso-8859-7',
00406 'hebrew' => 'iso-8859-8',
00407 'turkish' => 'iso-8859-9',
00408 'thai' => 'iso-8859-11',
00409 'lithuanian' => 'iso-8859-13',
00410 'chinese' => 'gb2312',
00411 'japanese' => 'euc-jp',
00412 'korean' => 'euc-kr',
00413 'simpl_chinese' => 'gb2312',
00414 'trad_chinese' => 'big5',
00415 'vietnamese' => '',
00416 'unicode' => 'utf-8',
00417 );
00418
00419
00420 var $script_to_charset_windows=array(
00421 'east_european' => 'windows-1250',
00422 'cyrillic' => 'windows-1251',
00423 'west_european' => 'windows-1252',
00424 'greek' => 'windows-1253',
00425 'turkish' => 'windows-1254',
00426 'hebrew' => 'windows-1255',
00427 'arabic' => 'windows-1256',
00428 'baltic' => 'windows-1257',
00429 'estonian' => 'windows-1257',
00430 'lithuanian' => 'windows-1257',
00431 'vietnamese' => 'windows-1258',
00432 'thai' => 'cp874',
00433 'korean' => 'cp949',
00434 'chinese' => 'gb2312',
00435 'japanese' => 'shift_jis',
00436 'simpl_chinese' => 'gb2312',
00437 'trad_chinese' => 'big5',
00438 );
00439
00440
00441 var $locale_to_charset=array(
00442 'japanese.euc' => 'euc-jp',
00443 'ja_jp.ujis' => 'euc-jp',
00444 'korean.euc' => 'euc-kr',
00445 'sr@Latn' => 'iso-8859-2',
00446 'zh_cn' => 'gb2312',
00447 'zh_hk' => 'big5',
00448 'zh_tw' => 'big5',
00449 );
00450
00451
00452
00453 var $charSetArray = array(
00454 'dk' => '',
00455 'de' => '',
00456 'no' => '',
00457 'it' => '',
00458 'fr' => '',
00459 'es' => '',
00460 'nl' => '',
00461 'cz' => 'windows-1250',
00462 'pl' => 'iso-8859-2',
00463 'si' => 'windows-1250',
00464 'fi' => '',
00465 'tr' => 'iso-8859-9',
00466 'se' => '',
00467 'pt' => '',
00468 'ru' => 'windows-1251',
00469 'ro' => 'iso-8859-2',
00470 'ch' => 'gb2312',
00471 'sk' => 'windows-1250',
00472 'lt' => 'windows-1257',
00473 'is' => 'utf-8',
00474 'hr' => 'windows-1250',
00475 'hu' => 'iso-8859-2',
00476 'gl' => '',
00477 'th' => 'iso-8859-11',
00478 'gr' => 'iso-8859-7',
00479 'hk' => 'big5',
00480 'eu' => '',
00481 'bg' => 'windows-1251',
00482 'br' => '',
00483 'et' => 'iso-8859-4',
00484 'ar' => 'iso-8859-6',
00485 'he' => 'utf-8',
00486 'ua' => 'windows-1251',
00487 'jp' => 'shift_jis',
00488 'lv' => 'utf-8',
00489 'vn' => 'utf-8',
00490 'ca' => 'iso-8859-15',
00491 'ba' => 'iso-8859-2',
00492 'kr' => 'euc-kr',
00493 'eo' => 'utf-8',
00494 'my' => '',
00495 'hi' => 'utf-8',
00496 'fo' => 'utf-8',
00497 'fa' => 'utf-8',
00498 'sr' => 'utf-8'
00499 );
00500
00501
00502
00503 var $isoArray = array(
00504 'ba' => 'bs',
00505 'br' => 'pt_BR',
00506 'ch' => 'zh_CN',
00507 'cz' => 'cs',
00508 'dk' => 'da',
00509 'si' => 'sl',
00510 'se' => 'sv',
00511 'gl' => 'kl',
00512 'gr' => 'el',
00513 'hk' => 'zh_HK',
00514 'kr' => 'ko',
00515 'ua' => 'uk',
00516 'jp' => 'ja',
00517 'vn' => 'vi',
00518 );
00519
00527 function parse_charset($charset) {
00528 $charset = strtolower($charset);
00529 if (isset($this->synonyms[$charset])) $charset = $this->synonyms[$charset];
00530
00531 return $charset;
00532 }
00533
00546 function get_locale_charset($locale) {
00547 $locale = strtolower($locale);
00548
00549
00550 if (isset($this->locale_to_charset[$locale])) return $this->locale_to_charset[$locale];
00551
00552
00553 list($locale,$modifier) = explode('@',$locale);
00554
00555
00556 list($locale,$charset) = explode('.',$locale);
00557 if ($charset) return $this->parse_charset($charset);
00558
00559
00560 if ($modifier == 'euro') return 'iso-8859-15';
00561
00562
00563 list($language,$country) = explode('_',$locale);
00564 if (isset($this->lang_to_script[$language])) $script = $this->lang_to_script[$language];
00565
00566 if (TYPO3_OS == 'WIN') {
00567 $cs = $this->script_to_charset_windows[$script] ? $this->script_to_charset_windows[$script] : 'window-1252';
00568 } else {
00569 $cs = $this->script_to_charset_unix[$script] ? $this->script_to_charset_unix[$script] : 'iso-8859-1';
00570 }
00571
00572 return $cs;
00573 }
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586
00587
00588
00599 function conv($str,$fromCS,$toCS,$useEntityForNoChar=0) {
00600 if ($fromCS==$toCS) return $str;
00601
00602
00603 if ($toCS=='utf-8' || !$useEntityForNoChar) {
00604 switch($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod']) {
00605 case 'mbstring':
00606 $conv_str = mb_convert_encoding($str,$toCS,$fromCS);
00607 if (false !== $conv_str) return $conv_str;
00608 break;
00609
00610 case 'iconv':
00611 $conv_str = iconv($fromCS,$toCS.'
00612 if (false !== $conv_str) return $conv_str;
00613 break;
00614
00615 case 'recode':
00616 $conv_str = recode_string($fromCS.'..'.$toCS,$str);
00617 if (false !== $conv_str) return $conv_str;
00618 break;
00619 }
00620
00621 }
00622
00623 if ($fromCS!='utf-8') $str=$this->utf8_encode($str,$fromCS);
00624 if ($toCS!='utf-8') $str=$this->utf8_decode($str,$toCS,$useEntityForNoChar);
00625 return $str;
00626 }
00627
00639 function convArray(&$array,$fromCS,$toCS,$useEntityForNoChar=0) {
00640 foreach($array as $key => $value) {
00641 if (is_array($array[$key])) {
00642 $this->convArray($array[$key],$fromCS,$toCS,$useEntityForNoChar);
00643 } else {
00644 $array[$key] = $this->conv($array[$key],$fromCS,$toCS,$useEntityForNoChar);
00645 }
00646 }
00647 }
00648
00656 function utf8_encode($str,$charset) {
00657
00658 if ($charset === 'utf-8') return $str;
00659
00660
00661 if ($this->initCharset($charset)) {
00662 $strLen = strlen($str);
00663 $outStr='';
00664
00665 for ($a=0;$a<$strLen;$a++) {
00666 $chr=substr($str,$a,1);
00667 $ord=ord($chr);
00668 if (isset($this->twoByteSets[$charset])) {
00669 $ord2 = ord($str{$a+1});
00670 $ord = $ord<<8 & $ord2;
00671
00672 if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
00673 $outStr.=$this->parsedCharsets[$charset]['local'][$ord];
00674 } else $outStr.=chr($this->noCharByteVal);
00675 $a++;
00676 } elseif ($ord>127) {
00677 if (isset($this->eucBasedSets[$charset])) {
00678 if ($charset != 'shift_jis' || ($ord < 0xA0 || $ord > 0xDF)) {
00679 $a++;
00680 $ord2=ord(substr($str,$a,1));
00681 $ord = $ord*256+$ord2;
00682 }
00683 }
00684
00685 if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
00686 $outStr.= $this->parsedCharsets[$charset]['local'][$ord];
00687 } else $outStr.= chr($this->noCharByteVal);
00688 } else $outStr.= $chr;
00689 }
00690 return $outStr;
00691 }
00692 }
00693
00702 function utf8_decode($str,$charset,$useEntityForNoChar=0) {
00703
00704
00705 if ($this->initCharset($charset)) {
00706 $strLen = strlen($str);
00707 $outStr='';
00708 $buf='';
00709 for ($a=0,$i=0;$a<$strLen;$a++,$i++) {
00710 $chr=substr($str,$a,1);
00711 $ord=ord($chr);
00712 if ($ord>127) {
00713 if ($ord & 64) {
00714
00715 $buf=$chr;
00716 for ($b=0;$b<8;$b++) {
00717 $ord = $ord << 1;
00718 if ($ord & 128) {
00719 $a++;
00720 $buf.=substr($str,$a,1);
00721 } else break;
00722 }
00723
00724 if (isset($this->parsedCharsets[$charset]['utf8'][$buf])) {
00725 $mByte = $this->parsedCharsets[$charset]['utf8'][$buf];
00726 if ($mByte>255) {
00727 $outStr.= chr(($mByte >> 8) & 255).chr($mByte & 255);
00728 } else $outStr.= chr($mByte);
00729 } elseif ($useEntityForNoChar) {
00730 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';';
00731 } else $outStr.=chr($this->noCharByteVal);
00732 } else $outStr.=chr($this->noCharByteVal);
00733 } else $outStr.=$chr;
00734 }
00735 return $outStr;
00736 }
00737 }
00738
00745 function utf8_to_entities($str) {
00746 $strLen = strlen($str);
00747 $outStr='';
00748 $buf='';
00749 for ($a=0;$a<$strLen;$a++) {
00750 $chr=substr($str,$a,1);
00751 $ord=ord($chr);
00752 if ($ord>127) {
00753 if ($ord & 64) {
00754 $buf=$chr;
00755 for ($b=0;$b<8;$b++) {
00756 $ord = $ord << 1;
00757 if ($ord & 128) {
00758 $a++;
00759 $buf.=substr($str,$a,1);
00760 } else break;
00761 }
00762
00763 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';';
00764 } else $outStr.=chr($this->noCharByteVal);
00765 } else $outStr.=$chr;
00766 }
00767
00768 return $outStr;
00769 }
00770
00778 function entities_to_utf8($str,$alsoStdHtmlEnt=0) {
00779 if ($alsoStdHtmlEnt) {
00780 $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES));
00781 }
00782
00783 $token = md5(microtime());
00784 $parts = explode($token,ereg_replace('(&([#[:alnum:]]*);)',$token.'\2'.$token,$str));
00785 foreach($parts as $k => $v) {
00786 if ($k%2) {
00787 if (substr($v,0,1)=='#') {
00788 if (substr($v,1,1)=='x') {
00789 $parts[$k] = $this->UnumberToChar(hexdec(substr($v,2)));
00790 } else {
00791 $parts[$k] = $this->UnumberToChar(substr($v,1));
00792 }
00793 } elseif ($alsoStdHtmlEnt && $trans_tbl['&'.$v.';']) {
00794 $parts[$k] = $this->utf8_encode($trans_tbl['&'.$v.';'],'iso-8859-1');
00795 } else {
00796 $parts[$k] ='&'.$v.';';
00797 }
00798 }
00799 }
00800
00801 return implode('',$parts);
00802 }
00803
00812 function utf8_to_numberarray($str,$convEntities=0,$retChar=0) {
00813
00814 if ($convEntities) {
00815 $str = $this->entities_to_utf8($str,1);
00816 }
00817
00818 $strLen = strlen($str);
00819 $outArr=array();
00820 $buf='';
00821 for ($a=0;$a<$strLen;$a++) {
00822 $chr=substr($str,$a,1);
00823 $ord=ord($chr);
00824 if ($ord>127) {
00825 if ($ord & 64) {
00826 $buf=$chr;
00827 for ($b=0;$b<8;$b++) {
00828 $ord = $ord << 1;
00829 if ($ord & 128) {
00830 $a++;
00831 $buf.=substr($str,$a,1);
00832 } else break;
00833 }
00834
00835 $outArr[]=$retChar?$buf:$this->utf8CharToUnumber($buf);
00836 } else $outArr[]=$retChar?chr($this->noCharByteVal):$this->noCharByteVal;
00837 } else $outArr[]=$retChar?chr($ord):$ord;
00838 }
00839
00840 return $outArr;
00841 }
00842
00862 function UnumberToChar($cbyte) {
00863 $str='';
00864
00865 if ($cbyte < 0x80) {
00866 $str.=chr($cbyte);
00867 } else if ($cbyte < 0x800) {
00868 $str.=chr(0xC0 | ($cbyte >> 6));
00869 $str.=chr(0x80 | ($cbyte & 0x3F));
00870 } else if ($cbyte < 0x10000) {
00871 $str.=chr(0xE0 | ($cbyte >> 12));
00872 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00873 $str.=chr(0x80 | ($cbyte & 0x3F));
00874 } else if ($cbyte < 0x200000) {
00875 $str.=chr(0xF0 | ($cbyte >> 18));
00876 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00877 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00878 $str.=chr(0x80 | ($cbyte & 0x3F));
00879 } else if ($cbyte < 0x4000000) {
00880 $str.=chr(0xF8 | ($cbyte >> 24));
00881 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F));
00882 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00883 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00884 $str.=chr(0x80 | ($cbyte & 0x3F));
00885 } else if ($cbyte < 0x80000000) {
00886 $str.=chr(0xFC | ($cbyte >> 30));
00887 $str.=chr(0x80 | (($cbyte >> 24) & 0x3F));
00888 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F));
00889 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00890 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00891 $str.=chr(0x80 | ($cbyte & 0x3F));
00892 } else {
00893 $str .= chr($this->noCharByteVal);
00894 }
00895 return $str;
00896 }
00897
00907 function utf8CharToUnumber($str,$hex=0) {
00908 $ord=ord(substr($str,0,1));
00909
00910 if (($ord & 192) == 192) {
00911 $binBuf='';
00912 for ($b=0;$b<8;$b++) {
00913 $ord = $ord << 1;
00914 if ($ord & 128) {
00915 $binBuf.=substr('00000000'.decbin(ord(substr($str,$b+1,1))),-6);
00916 } else break;
00917 }
00918 $binBuf=substr('00000000'.decbin(ord(substr($str,0,1))),-(6-$b)).$binBuf;
00919
00920 $int = bindec($binBuf);
00921 } else $int = $ord;
00922
00923 return $hex ? 'x'.dechex($int) : $int;
00924 }
00925
00926
00927
00928
00929
00930
00931
00932
00933
00934
00935
00936
00937
00938
00939
00950 function initCharset($charset) {
00951
00952 if (!is_array($this->parsedCharsets[$charset])) {
00953
00954
00955 $charsetConvTableFile = PATH_t3lib.'csconvtbl/'.$charset.'.tbl';
00956
00957
00958 if ($charset && t3lib_div::validPathStr($charsetConvTableFile) && @is_file($charsetConvTableFile)) {
00959
00960
00961 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/charset_'.$charset.'.tbl');
00962 if ($cacheFile && @is_file($cacheFile)) {
00963 $this->parsedCharsets[$charset]=unserialize(t3lib_div::getUrl($cacheFile));
00964 } else {
00965
00966 $lines=t3lib_div::trimExplode(chr(10),t3lib_div::getUrl($charsetConvTableFile),1);
00967
00968 $this->parsedCharsets[$charset]=array('local'=>array(),'utf8'=>array());
00969
00970 $detectedType='';
00971 foreach($lines as $value) {
00972 if (trim($value) && substr($value,0,1)!='#') {
00973
00974
00975
00976 if (!$detectedType) $detectedType = ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value) ? 'whitespaced' : 'ms-token';
00977
00978 if ($detectedType=='ms-token') {
00979 list($hexbyte,$utf8) = split('=|:',$value,3);
00980 } elseif ($detectedType=='whitespaced') {
00981 $regA=array();
00982 ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value,$regA);
00983 $hexbyte = $regA[1];
00984 $utf8 = 'U+'.$regA[2];
00985 }
00986 $decval = hexdec(trim($hexbyte));
00987 if ($decval>127) {
00988 $utf8decval = hexdec(substr(trim($utf8),2));
00989 $this->parsedCharsets[$charset]['local'][$decval]=$this->UnumberToChar($utf8decval);
00990 $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]]=$decval;
00991 }
00992 }
00993 }
00994 if ($cacheFile) {
00995 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->parsedCharsets[$charset]));
00996 }
00997 }
00998 return 2;
00999 } else return false;
01000 } else return 1;
01001 }
01002
01012 function initUnicodeData($mode=null) {
01013
01014 $cacheFileCase = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_utf-8.tbl');
01015 $cacheFileASCII = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_utf-8.tbl');
01016
01017
01018 switch($mode) {
01019 case 'case':
01020 if (is_array($this->caseFolding['utf-8'])) return 1;
01021
01022
01023 if ($cacheFileCase && @is_file($cacheFileCase)) {
01024 $this->caseFolding['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileCase));
01025 return 2;
01026 }
01027 break;
01028
01029 case 'ascii':
01030 if (is_array($this->toASCII['utf-8'])) return 1;
01031
01032
01033 if ($cacheFileASCII && @is_file($cacheFileASCII)) {
01034 $this->toASCII['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileASCII));
01035 return 2;
01036 }
01037 break;
01038 }
01039
01040
01041 $unicodeDataFile = PATH_t3lib.'unidata/UnicodeData.txt';
01042 if (!(t3lib_div::validPathStr($unicodeDataFile) && @is_file($unicodeDataFile))) return false;
01043
01044 $fh = fopen($unicodeDataFile,'rb');
01045 if (!$fh) return false;
01046
01047
01048
01049 $this->caseFolding['utf-8'] = array();
01050 $utf8CaseFolding =& $this->caseFolding['utf-8'];
01051 $utf8CaseFolding['toUpper'] = array();
01052 $utf8CaseFolding['toLower'] = array();
01053 $utf8CaseFolding['toTitle'] = array();
01054
01055 $decomposition = array();
01056 $mark = array();
01057 $number = array();
01058 $omit = array();
01059
01060 while (!feof($fh)) {
01061 $line = fgets($fh,4096);
01062
01063 list($char,$name,$cat,,,$decomp,,,$num,,,,$upper,$lower,$title,) = split(';', rtrim($line));
01064
01065 $ord = hexdec($char);
01066 if ($ord > 0xFFFF) break;
01067
01068 $utf8_char = $this->UnumberToChar($ord);
01069
01070 if ($upper) $utf8CaseFolding['toUpper'][$utf8_char] = $this->UnumberToChar(hexdec($upper));
01071 if ($lower) $utf8CaseFolding['toLower'][$utf8_char] = $this->UnumberToChar(hexdec($lower));
01072
01073 if ($title && $title != $upper) $utf8CaseFolding['toTitle'][$utf8_char] = $this->UnumberToChar(hexdec($title));
01074
01075 switch ($cat{0}) {
01076 case 'M':
01077 $mark["U+$char"] = 1;
01078 break;
01079
01080 case 'N':
01081 if ($ord > 0x80 && $num != '') $number["U+$char"] = $num;
01082 }
01083
01084
01085 $match = array();
01086 if (ereg('^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH',$name,$match) && !$decomp) {
01087 $c = ord($match[2]);
01088 if ($match[1] == 'SMALL') $c += 32;
01089
01090 $decomposition["U+$char"] = array(dechex($c));
01091 continue;
01092 }
01093
01094 $match = array();
01095 if (ereg('(<.*>)? *(.+)',$decomp,$match)) {
01096 switch($match[1]) {
01097 case '<circle>':
01098 $match[2] = '0028 '.$match[2].' 0029';
01099 break;
01100
01101 case '<square>':
01102 $match[2] = '005B '.$match[2].' 005D';
01103 break;
01104
01105 case '<compat>':
01106 if (ereg('^0020 ',$match[2])) continue 2;
01107 break;
01108
01109
01110 case '<initial>':
01111 case '<medial>':
01112 case '<final>':
01113 case '<isolated>':
01114 case '<vertical>':
01115 continue 2;
01116 }
01117 $decomposition["U+$char"] = split(' ',$match[2]);
01118 }
01119 }
01120 fclose($fh);
01121
01122
01123 $specialCasingFile = PATH_t3lib.'unidata/SpecialCasing.txt';
01124 if (t3lib_div::validPathStr($specialCasingFile) && @is_file($specialCasingFile)) {
01125 $fh = fopen($specialCasingFile,'rb');
01126 if ($fh) {
01127 while (!feof($fh)) {
01128 $line = fgets($fh,4096);
01129 if ($line{0} != '#' && trim($line) != '') {
01130
01131 list($char,$lower,$title,$upper,$cond) = t3lib_div::trimExplode(';', $line);
01132 if ($cond == '' || $cond{0} == '#') {
01133 $utf8_char = $this->UnumberToChar(hexdec($char));
01134 if ($char != $lower) {
01135 $arr = split(' ',$lower);
01136 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01137 $utf8CaseFolding['toLower'][$utf8_char] = implode('',$arr);
01138 }
01139 if ($char != $title && $title != $upper) {
01140 $arr = split(' ',$title);
01141 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01142 $utf8CaseFolding['toTitle'][$utf8_char] = implode('',$arr);
01143 }
01144 if ($char != $upper) {
01145 $arr = split(' ',$upper);
01146 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01147 $utf8CaseFolding['toUpper'][$utf8_char] = implode('',$arr);
01148 }
01149 }
01150 }
01151 }
01152 fclose($fh);
01153 }
01154 }
01155
01156
01157 $customTranslitFile = PATH_t3lib.'unidata/Translit.txt';
01158 if (t3lib_div::validPathStr($customTranslitFile) && @is_file($customTranslitFile)) {
01159 $fh = fopen($customTranslitFile,'rb');
01160 if ($fh) {
01161 while (!feof($fh)) {
01162 $line = fgets($fh,4096);
01163 if ($line{0} != '#' && trim($line) != '') {
01164 list($char,$translit) = t3lib_div::trimExplode(';', $line);
01165 if (!$translit) $omit["U+$char"] = 1;
01166 $decomposition["U+$char"] = split(' ', $translit);
01167
01168 }
01169 }
01170 fclose($fh);
01171 }
01172 }
01173
01174
01175 foreach($decomposition as $from => $to) {
01176 $code_decomp = array();
01177
01178 while ($code_value = array_shift($to)) {
01179 if (isset($decomposition["U+$code_value"])) {
01180 foreach(array_reverse($decomposition["U+$code_value"]) as $cv) {
01181 array_unshift($to, $cv);
01182 }
01183 } elseif (!isset($mark["U+$code_value"])) {
01184 array_push($code_decomp, $code_value);
01185 }
01186 }
01187 if (count($code_decomp) || isset($omit[$from])) {
01188 $decomposition[$from] = $code_decomp;
01189 } else {
01190 unset($decomposition[$from]);
01191 }
01192 }
01193
01194
01195 $this->toASCII['utf-8'] = array();
01196 $ascii =& $this->toASCII['utf-8'];
01197
01198 foreach($decomposition as $from => $to) {
01199 $code_decomp = array();
01200 while ($code_value = array_shift($to)) {
01201 $ord = hexdec($code_value);
01202 if ($ord > 127)
01203 continue 2;
01204 else
01205 array_push($code_decomp,chr($ord));
01206 }
01207 $ascii[$this->UnumberToChar(hexdec($from))] = join('',$code_decomp);
01208 }
01209
01210
01211 foreach($number as $from => $to) {
01212 $utf8_char = $this->UnumberToChar(hexdec($from));
01213 if (!isset($ascii[$utf8_char])) {
01214 $ascii[$utf8_char] = $to;
01215 }
01216 }
01217
01218 if ($cacheFileCase) {
01219 t3lib_div::writeFileToTypo3tempDir($cacheFileCase,serialize($utf8CaseFolding));
01220 }
01221
01222 if ($cacheFileASCII) {
01223 t3lib_div::writeFileToTypo3tempDir($cacheFileASCII,serialize($ascii));
01224 }
01225
01226 return 3;
01227 }
01228
01237 function initCaseFolding($charset) {
01238
01239 if (is_array($this->caseFolding[$charset])) return 1;
01240
01241
01242 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_'.$charset.'.tbl');
01243 if ($cacheFile && @is_file($cacheFile)) {
01244 $this->caseFolding[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
01245 return 2;
01246 }
01247
01248
01249 if (!$this->initCharset($charset)) {
01250 return false;
01251 }
01252
01253
01254 if (!$this->initUnicodeData('case')) {
01255 return false;
01256 }
01257
01258 $nochar = chr($this->noCharByteVal);
01259 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) {
01260
01261 $c = $this->utf8_decode($utf8, $charset);
01262
01263
01264 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toUpper'][$utf8], $charset);
01265 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toUpper'][$c] = $cc;
01266
01267
01268 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toLower'][$utf8], $charset);
01269 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toLower'][$c] = $cc;
01270
01271
01272 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toTitle'][$utf8], $charset);
01273 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toTitle'][$c] = $cc;
01274 }
01275
01276
01277 for ($i=ord('a'); $i<=ord('z'); $i++) {
01278 $this->caseFolding[$charset]['toUpper'][chr($i)] = chr($i-32);
01279 }
01280 for ($i=ord('A'); $i<=ord('Z'); $i++) {
01281 $this->caseFolding[$charset]['toLower'][chr($i)] = chr($i+32);
01282 }
01283
01284 if ($cacheFile) {
01285 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->caseFolding[$charset]));
01286 }
01287
01288 return 3;
01289 }
01290
01299 function initToASCII($charset) {
01300
01301 if (is_array($this->toASCII[$charset])) return 1;
01302
01303
01304 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_'.$charset.'.tbl');
01305 if ($cacheFile && @is_file($cacheFile)) {
01306 $this->toASCII[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
01307 return 2;
01308 }
01309
01310
01311 if (!$this->initCharset($charset)) {
01312 return false;
01313 }
01314
01315
01316 if (!$this->initUnicodeData('ascii')) {
01317 return false;
01318 }
01319
01320 $nochar = chr($this->noCharByteVal);
01321 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) {
01322
01323 $c = $this->utf8_decode($utf8, $charset);
01324
01325 if (isset($this->toASCII['utf-8'][$utf8])) {
01326 $this->toASCII[$charset][$c] = $this->toASCII['utf-8'][$utf8];
01327 }
01328 }
01329
01330 if ($cacheFile) {
01331 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->toASCII[$charset]));
01332 }
01333
01334 return 3;
01335 }
01336
01337
01338
01339
01340
01341
01342
01343
01344
01345
01346
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01370 function substr($charset,$string,$start,$len=null) {
01371 if ($len===0) return '';
01372
01373 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01374
01375 if ($len==null) {
01376 $enc = mb_internal_encoding();
01377 mb_internal_encoding($charset);
01378 $str = mb_substr($string,$start);
01379 mb_internal_encoding($enc);
01380
01381 return $str;
01382 }
01383 else {
01384 return mb_substr($string,$start,$len,$charset);
01385 }
01386 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') {
01387
01388 if ($len==null) {
01389 $enc = iconv_get_encoding('internal_encoding');
01390 iconv_set_encoding('internal_encoding',$charset);
01391 $str = iconv_substr($string,$start);
01392 iconv_set_encoding('internal_encoding',$enc);
01393
01394 return $str;
01395 }
01396 else {
01397 return iconv_substr($string,$start,$len,$charset);
01398 }
01399 } elseif ($charset == 'utf-8') {
01400 return $this->utf8_substr($string,$start,$len);
01401 } elseif ($this->eucBasedSets[$charset]) {
01402 return $this->euc_substr($string,$start,$charset,$len);
01403 } elseif ($this->twoByteSets[$charset]) {
01404 return substr($string,$start*2,$len*2);
01405 } elseif ($this->fourByteSets[$charset]) {
01406 return substr($string,$start*4,$len*4);
01407 }
01408
01409
01410 return $len === NULL ? substr($string,$start) : substr($string,$start,$len);
01411 }
01412
01423 function strlen($charset,$string) {
01424 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01425 return mb_strlen($string,$charset);
01426 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') {
01427 return iconv_strlen($string,$charset);
01428 } elseif ($charset == 'utf-8') {
01429 return $this->utf8_strlen($string);
01430 } elseif ($this->eucBasedSets[$charset]) {
01431 return $this->euc_strlen($string,$charset);
01432 } elseif ($this->twoByteSets[$charset]) {
01433 return strlen($string)/2;
01434 } elseif ($this->fourByteSets[$charset]) {
01435 return strlen($string)/4;
01436 }
01437
01438 return strlen($string);
01439 }
01440
01453 function crop($charset,$string,$len,$crop='') {
01454 if (intval($len) == 0) return $string;
01455
01456 if ($charset == 'utf-8') {
01457 $i = $this->utf8_char2byte_pos($string,$len);
01458 } elseif ($this->eucBasedSets[$charset]) {
01459 $i = $this->euc_char2byte_pos($string,$len,$charset);
01460 } else {
01461 if ($len > 0) {
01462 $i = $len;
01463 } else {
01464 $i = strlen($string)+$len;
01465 if ($i<=0) $i = false;
01466 }
01467 }
01468
01469 if ($i === false) {
01470 return $string;
01471 } else {
01472 if ($len > 0) {
01473 if (strlen($string{$i})) {
01474 return substr($string,0,$i).$crop;
01475
01476 }
01477 } else {
01478 if (strlen($string{$i-1})) {
01479 return $crop.substr($string,$i);
01480 }
01481 }
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492 }
01493 return $string;
01494 }
01495
01506 function strtrunc($charset,$string,$len) {
01507 if ($len <= 0) return '';
01508
01509 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01510 return mb_strcut($string,0,$len,$charset);
01511 } elseif ($charset == 'utf-8') {
01512 return $this->utf8_strtrunc($string,$len);
01513 } elseif ($this->eucBasedSets[$charset]) {
01514 return $this->euc_strtrunc($string,$charset);
01515 } elseif ($this->twoByteSets[$charset]) {
01516 if ($len % 2) $len--;
01517 } elseif ($this->fourByteSets[$charset]) {
01518 $x = $len % 4;
01519 $len -= $x;
01520 }
01521
01522 return substr($string,0,$len);
01523 }
01524
01540 function conv_case($charset,$string,$case) {
01541 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring' && (float)phpversion() >= 4.3) {
01542 if ($case == 'toLower') {
01543 $string = mb_strtolower($string,$charset);
01544 } else {
01545 $string = mb_strtoupper($string,$charset);
01546 }
01547 } elseif ($charset == 'utf-8') {
01548 $string = $this->utf8_char_mapping($string,'case',$case);
01549 } elseif (isset($this->eucBasedSets[$charset])) {
01550 $string = $this->euc_char_mapping($string,$charset,'case',$case);
01551 } else {
01552
01553 $string = $this->sb_char_mapping($string,$charset,'case',$case);
01554 }
01555
01556 return $string;
01557 }
01558
01566 function specCharsToASCII($charset,$string) {
01567 if ($charset == 'utf-8') {
01568 $string = $this->utf8_char_mapping($string,'ascii');
01569 } elseif (isset($this->eucBasedSets[$charset])) {
01570 $string = $this->euc_char_mapping($string,$charset,'ascii');
01571 } else {
01572
01573 $string = $this->sb_char_mapping($string,$charset,'ascii');
01574 }
01575
01576 return $string;
01577 }
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594
01595
01606 function sb_char_mapping($str,$charset,$mode,$opt='') {
01607 switch($mode) {
01608 case 'case':
01609 if (!$this->initCaseFolding($charset)) return $str;
01610 $map =& $this->caseFolding[$charset][$opt];
01611 break;
01612
01613 case 'ascii':
01614 if (!$this->initToASCII($charset)) return $str;
01615 $map =& $this->toASCII[$charset];
01616 break;
01617
01618 default:
01619 return $str;
01620 }
01621
01622 $out = '';
01623 for($i=0; strlen($str{$i}); $i++) {
01624 $c = $str{$i};
01625 if (isset($map[$c])) {
01626 $out .= $map[$c];
01627 } else {
01628 $out .= $c;
01629 }
01630 }
01631
01632 return $out;
01633 }
01634
01635
01636
01637
01638
01639
01640
01641
01642
01643
01644
01645
01646
01647
01648
01649
01661 function utf8_substr($str,$start,$len=null) {
01662 if (!strcmp($len,'0')) return '';
01663
01664 $byte_start = $this->utf8_char2byte_pos($str,$start);
01665 if ($byte_start === false) {
01666 if ($start > 0) {
01667 return false;
01668 } else {
01669 $start = 0;
01670 }
01671 }
01672
01673 $str = substr($str,$byte_start);
01674
01675 if ($len!=null) {
01676 $byte_end = $this->utf8_char2byte_pos($str,$len);
01677 if ($byte_end === false)
01678 return $len<0 ? '' : $str;
01679 else
01680 return substr($str,0,$byte_end);
01681 }
01682 else return $str;
01683 }
01684
01694 function utf8_strlen($str) {
01695 $n=0;
01696 for($i=0; strlen($str{$i}); $i++) {
01697 $c = ord($str{$i});
01698 if (!($c & 0x80))
01699 $n++;
01700 elseif (($c & 0xC0) == 0xC0)
01701 $n++;
01702 }
01703 return $n;
01704 }
01705
01715 function utf8_strtrunc($str,$len) {
01716 $i = $len-1;
01717 if (ord($str{$i}) & 0x80) {
01718 for (; $i>0 && !(ord($str{$i}) & 0x40); $i--) ;
01719 if ($i <= 0) return '';
01720 for ($bc=0, $mbs=ord($str{$i}); $mbs & 0x80; $mbs = $mbs << 1) $bc++;
01721 if ($bc+$i > $len) return substr($str,0,$i);
01722
01723 }
01724 return substr($str,0,$len);
01725 }
01726
01737 function utf8_strpos($haystack,$needle,$offset=0) {
01738 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01739 return mb_strpos($haystack,$needle,$offset,'utf-8');
01740 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') {
01741 return iconv_strpos($haystack,$needle,$offset,'utf-8');
01742 }
01743
01744 $byte_offset = $this->utf8_char2byte_pos($haystack,$offset);
01745 if ($byte_offset === false) return false;
01746
01747 $byte_pos = strpos($haystack,$needle,$byte_offset);
01748 if ($byte_pos === false) return false;
01749
01750 return $this->utf8_byte2char_pos($haystack,$byte_pos);
01751 }
01752
01762 function utf8_strrpos($haystack,$needle) {
01763 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01764 return mb_strrpos($haystack,$needle,'utf-8');
01765 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') {
01766 return iconv_strrpos($haystack,$needle,'utf-8');
01767 }
01768
01769 $byte_pos = strrpos($haystack,$needle);
01770 if ($byte_pos === false) return false;
01771
01772 return $this->utf8_byte2char_pos($haystack,$byte_pos);
01773 }
01774
01784 function utf8_char2byte_pos($str,$pos) {
01785 $n = 0;
01786 $p = abs($pos);
01787
01788 if ($pos >= 0) {
01789 $i = 0;
01790 $d = 1;
01791 } else {
01792 $i = strlen($str)-1;
01793 $d = -1;
01794 }
01795
01796 for( ; strlen($str{$i}) && $n<$p; $i+=$d) {
01797 $c = (int)ord($str{$i});
01798 if (!($c & 0x80))
01799 $n++;
01800 elseif (($c & 0xC0) == 0xC0)
01801 $n++;
01802 }
01803 if (!strlen($str{$i})) return false;
01804
01805 if ($pos >= 0) {
01806
01807 while ((ord($str{$i}) & 0x80) && !(ord($str{$i}) & 0x40)) { $i++; }
01808 } else {
01809
01810 $i++;
01811 }
01812
01813 return $i;
01814 }
01815
01825 function utf8_byte2char_pos($str,$pos) {
01826 $n = 0;
01827 for($i=$pos; $i>0; $i--) {
01828 $c = (int)ord($str{$i});
01829 if (!($c & 0x80))
01830 $n++;
01831 elseif (($c & 0xC0) == 0xC0)
01832 $n++;
01833 }
01834 if (!strlen($str{$i})) return false;
01835
01836 return $n;
01837 }
01838
01848 function utf8_char_mapping($str,$mode,$opt='') {
01849 if (!$this->initUnicodeData($mode)) return $str;
01850
01851 $out = '';
01852 switch($mode) {
01853 case 'case':
01854 $map =& $this->caseFolding['utf-8'][$opt];
01855 break;
01856
01857 case 'ascii':
01858 $map =& $this->toASCII['utf-8'];
01859 break;
01860
01861 default:
01862 return $str;
01863 }
01864
01865 for($i=0; strlen($str{$i}); $i++) {
01866 $c = ord($str{$i});
01867 if (!($c & 0x80))
01868 $mbc = $str{$i};
01869 elseif (($c & 0xC0) == 0xC0) {
01870 for ($bc=0; $c & 0x80; $c = $c << 1) { $bc++; }
01871 $mbc = substr($str,$i,$bc);
01872 $i += $bc-1;
01873 }
01874
01875 if (isset($map[$mbc])) {
01876 $out .= $map[$mbc];
01877 } else {
01878 $out .= $mbc;
01879 }
01880 }
01881
01882 return $out;
01883 }
01884
01885
01886
01887
01888
01889
01890
01891
01892
01893
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903
01904
01905
01906
01907
01908
01909
01910
01911
01912
01913
01924 function euc_strtrunc($str,$len,$charset) {
01925 $sjis = ($charset == 'shift_jis');
01926 for ($i=0; strlen($str{$i}) && $i<$len; $i++) {
01927 $c = ord($str{$i});
01928 if ($sjis) {
01929 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++;
01930 }
01931 else {
01932 if ($c >= 0x80) $i++;
01933 }
01934 }
01935 if (!strlen($str{$i})) return $str;
01936
01937 if ($i>$len)
01938 return substr($str,0,$len-1);
01939 else
01940 return substr($str,0,$len);
01941 }
01942
01953 function euc_substr($str,$start,$charset,$len=null) {
01954 $byte_start = $this->euc_char2byte_pos($str,$start,$charset);
01955 if ($byte_start === false) return false;
01956
01957 $str = substr($str,$byte_start);
01958
01959 if ($len!=null) {
01960 $byte_end = $this->euc_char2byte_pos($str,$len,$charset);
01961 if ($byte_end === false)
01962 return $str;
01963 else
01964 return substr($str,0,$byte_end);
01965 }
01966 else return $str;
01967 }
01968
01978 function euc_strlen($str,$charset) {
01979 $sjis = ($charset == 'shift_jis');
01980 $n=0;
01981 for ($i=0; strlen($str{$i}); $i++) {
01982 $c = ord($str{$i});
01983 if ($sjis) {
01984 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++;
01985 }
01986 else {
01987 if ($c >= 0x80) $i++;
01988 }
01989
01990 $n++;
01991 }
01992
01993 return $n;
01994 }
01995
02005 function euc_char2byte_pos($str,$pos,$charset) {
02006 $sjis = ($charset == 'shift_jis');
02007 $n = 0;
02008 $p = abs($pos);
02009
02010 if ($pos >= 0) {
02011 $i = 0;
02012 $d = 1;
02013 } else {
02014 $i = strlen($str)-1;
02015 $d = -1;
02016 }
02017
02018 for ( ; strlen($str{$i}) && $n<$p; $i+=$d) {
02019 $c = ord($str{$i});
02020 if ($sjis) {
02021 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i+=$d;
02022 }
02023 else {
02024 if ($c >= 0x80) $i+=$d;
02025 }
02026
02027 $n++;
02028 }
02029 if (!strlen($str{$i})) return false;
02030
02031 if ($pos < 0) $i++;
02032
02033 return $i;
02034 }
02035
02046 function euc_char_mapping($str,$charset,$mode,$opt='') {
02047 switch($mode) {
02048 case 'case':
02049 if (!$this->initCaseFolding($charset)) return $str;
02050 $map =& $this->caseFolding[$charset][$opt];
02051 break;
02052
02053 case 'ascii':
02054 if (!$this->initToASCII($charset)) return $str;
02055 $map =& $this->toASCII[$charset];
02056 break;
02057
02058 default:
02059 return $str;
02060 }
02061
02062 $sjis = ($charset == 'shift_jis');
02063 $out = '';
02064 for($i=0; strlen($str{$i}); $i++) {
02065 $mbc = $str{$i};
02066 $c = ord($mbc);
02067
02068 if ($sjis) {
02069 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) {
02070 $mbc = substr($str,$i,2);
02071 $i++;
02072 }
02073 }
02074 else {
02075 if ($c >= 0x80) {
02076 $mbc = substr($str,$i,2);
02077 $i++;
02078 }
02079 }
02080
02081 if (isset($map[$mbc])) {
02082 $out .= $map[$mbc];
02083 } else {
02084 $out .= $mbc;
02085 }
02086 }
02087
02088 return $out;
02089 }
02090
02091 }
02092
02093 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']) {
02094 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']);
02095 }
02096 ?>