00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00136 class t3lib_cs {
00137 var $noCharByteVal=63;
00138
00139
00140 var $parsedCharsets=array();
00141
00142
00143 var $caseFolding=array();
00144
00145
00146 var $toASCII=array();
00147
00148
00149 var $twoByteSets=array(
00150 'ucs-2'=>1,
00151 );
00152
00153
00154 var $fourByteSets=array(
00155 'ucs-4'=>1,
00156 'utf-32'=>1,
00157 );
00158
00159
00160 var $eucBasedSets=array(
00161 'gb2312'=>1,
00162 'big5'=>1,
00163 'euc-kr'=>1,
00164 'shift_jis'=>1,
00165 );
00166
00167
00168
00169 var $synonyms=array(
00170 'us' => 'ascii',
00171 'us-ascii'=> 'ascii',
00172 'cp819' => 'iso-8859-1',
00173 'ibm819' => 'iso-8859-1',
00174 'iso-ir-100' => 'iso-8859-1',
00175 'iso-ir-109' => 'iso-8859-2',
00176 'iso-ir-148' => 'iso-8859-9',
00177 'iso-ir-199' => 'iso-8859-14',
00178 'iso-ir-203' => 'iso-8859-15',
00179 'csisolatin1' => 'iso-8859-1',
00180 'csisolatin2' => 'iso-8859-2',
00181 'csisolatin3' => 'iso-8859-3',
00182 'csisolatin5' => 'iso-8859-9',
00183 'csisolatin8' => 'iso-8859-14',
00184 'csisolatin9' => 'iso-8859-15',
00185 'csisolatingreek' => 'iso-8859-7',
00186 'iso-celtic' => 'iso-8859-14',
00187 'latin1' => 'iso-8859-1',
00188 'latin2' => 'iso-8859-2',
00189 'latin3' => 'iso-8859-3',
00190 'latin5' => 'iso-8859-9',
00191 'latin6' => 'iso-8859-10',
00192 'latin8' => 'iso-8859-14',
00193 'latin9' => 'iso-8859-15',
00194 'l1' => 'iso-8859-1',
00195 'l2' => 'iso-8859-2',
00196 'l3' => 'iso-8859-3',
00197 'l5' => 'iso-8859-9',
00198 'l6' => 'iso-8859-10',
00199 'l8' => 'iso-8859-14',
00200 'l9' => 'iso-8859-15',
00201 'cyrillic' => 'iso-8859-5',
00202 'arabic' => 'iso-8859-6',
00203 'tis-620' => 'iso-8859-11',
00204 'win874' => 'windows-874',
00205 'win1250' => 'windows-1250',
00206 'win1251' => 'windows-1251',
00207 'win1252' => 'windows-1252',
00208 'win1253' => 'windows-1253',
00209 'win1254' => 'windows-1254',
00210 'win1255' => 'windows-1255',
00211 'win1256' => 'windows-1256',
00212 'win1257' => 'windows-1257',
00213 'win1258' => 'windows-1258',
00214 'cp1250' => 'windows-1250',
00215 'cp1251' => 'windows-1251',
00216 'cp1252' => 'windows-1252',
00217 'ms-ee' => 'windows-1250',
00218 'ms-ansi' => 'windows-1252',
00219 'ms-greek' => 'windows-1253',
00220 'ms-turk' => 'windows-1254',
00221 'winbaltrim' => 'windows-1257',
00222 'koi-8ru' => 'koi-8r',
00223 'koi8r' => 'koi-8r',
00224 'cp878' => 'koi-8r',
00225 'mac' => 'macroman',
00226 'macintosh' => 'macroman',
00227 'euc-cn' => 'gb2312',
00228 'x-euc-cn' => 'gb2312',
00229 'euccn' => 'gb2312',
00230 'cp936' => 'gb2312',
00231 'big-5' => 'big5',
00232 'cp950' => 'big5',
00233 'eucjp' => 'euc-jp',
00234 'sjis' => 'shift_jis',
00235 'shift-jis' => 'shift_jis',
00236 'cp932' => 'shift_jis',
00237 'cp949' => 'euc-kr',
00238 'utf7' => 'utf-7',
00239 'utf8' => 'utf-8',
00240 'utf16' => 'utf-16',
00241 'utf32' => 'utf-32',
00242 'utf8' => 'utf-8',
00243 'ucs2' => 'ucs-2',
00244 'ucs4' => 'ucs-4',
00245 );
00246
00247
00248 var $lang_to_langfamily=array(
00249
00250
00251
00252 'ar' => 'arabic',
00253 'bg' => 'cyrillic',
00254 'cs' => 'east_european',
00255 'da' => 'west_european',
00256 'de' => 'west_european',
00257 'es' => 'west_european',
00258 'et' => 'estonian',
00259 'eu' => 'west_european',
00260 'fi' => 'west_european',
00261 'fr' => 'west_european',
00262 'gr' => 'greek',
00263 'hr' => 'east_european',
00264 'hu' => 'east_european',
00265 'iw' => 'hebrew',
00266 'is' => 'west_european',
00267 'it' => 'west_european',
00268 'ja' => 'japanese',
00269 'kl' => 'west_european',
00270 'ko' => 'korean',
00271 'lt' => 'lithuanian',
00272 'lv' => 'west_european',
00273 'nl' => 'west_european',
00274 'no' => 'west_european',
00275 'pl' => 'east_european',
00276 'pt' => 'west_european',
00277 'ro' => 'east_european',
00278 'ru' => 'cyrillic',
00279 'sk' => 'east_european',
00280 'sl' => 'east_european',
00281 'sv' => 'west_european',
00282 'th' => 'thai',
00283 'uk' => 'cyrillic',
00284 'vi' => 'vietnamese',
00285 'zh' => 'chinese',
00286
00287 'chs' => 'simpl_chinese',
00288 'cht' => 'trad_chinese',
00289 'csy' => 'east_european',
00290 'dan' => 'west_european',
00291 'deu' => 'west_european',
00292 'dea' => 'west_european',
00293 'des' => 'west_european',
00294 'ena' => 'west_european',
00295 'enc' => 'west_european',
00296 'eng' => 'west_european',
00297 'enz' => 'west_european',
00298 'enu' => 'west_european',
00299 'nld' => 'west_european',
00300 'nlb' => 'west_european',
00301 'fin' => 'west_european',
00302 'fra' => 'west_european',
00303 'frb' => 'west_european',
00304 'frc' => 'west_european',
00305 'frs' => 'west_european',
00306 'ell' => 'greek',
00307 'hun' => 'east_european',
00308 'isl' => 'west_euorpean',
00309 'ita' => 'west_european',
00310 'its' => 'west_european',
00311 'jpn' => 'japanese',
00312 'kor' => 'korean',
00313 'nor' => 'west_european',
00314 'non' => 'west_european',
00315 'plk' => 'east_european',
00316 'ptg' => 'west_european',
00317 'ptb' => 'west_european',
00318 'rus' => 'east_european',
00319 'sky' => 'east_european',
00320 'esp' => 'west_european',
00321 'esm' => 'west_european',
00322 'esn' => 'west_european',
00323 'sve' => 'west_european',
00324 'trk' => 'turkish',
00325
00326 'bulgarian' => 'east_european',
00327 'catalan' => 'west_european',
00328 'croatian' => 'east_european',
00329 'czech' => 'east_european',
00330 'danish' => 'west_european',
00331 'dutch' => 'west_european',
00332 'english' => 'west_european',
00333 'finnish' => 'west_european',
00334 'french' => 'west_european',
00335 'galician' => 'west_european',
00336 'german' => 'west_european',
00337 'hungarian' => 'east_european',
00338 'icelandic' => 'west_european',
00339 'italian' => 'west_european',
00340 'latvian' => 'west_european',
00341 'lettish' => 'west_european',
00342 'norwegian' => 'west_european',
00343 'polish' => 'east_european',
00344 'portuguese' => 'west_european',
00345 'russian' => 'cyrillic',
00346 'romanian' => 'east_european',
00347 'slovak' => 'east_european',
00348 'slovenian' => 'east_european',
00349 'spanish' => 'west_european',
00350 'svedish' => 'west_european',
00351 'turkish' => 'east_european',
00352 'ukrainian' => 'cyrillic',
00353 );
00354
00355
00356 var $lang_to_charset_unix=array(
00357 'west_european' => 'iso-8859-1',
00358 'estonian' => 'iso-8859-1',
00359 'east_european' => 'iso-8859-2',
00360 'baltic' => 'iso-8859-4',
00361 'cyrillic' => 'iso-8859-5',
00362 'arabic' => 'iso-8859-6',
00363 'greek' => 'iso-8859-7',
00364 'hebrew' => 'iso-8859-8',
00365 'turkish' => 'iso-8859-9',
00366 'thai' => 'iso-8859-11',
00367 'lithuanian' => 'iso-8859-13',
00368 'chinese' => 'gb2312',
00369 'japanese' => 'euc-jp',
00370 'korean' => 'euc-kr',
00371 'simpl_chinese' => 'gb2312',
00372 'trad_chinese' => 'big5',
00373 'vietnamese' => '',
00374 );
00375
00376
00377 var $lang_to_charset_windows=array(
00378 'east_european' => 'windows-1250',
00379 'cyrillic' => 'windows-1251',
00380 'west_european' => 'windows-1252',
00381 'greek' => 'windows-1253',
00382 'turkish' => 'windows-1254',
00383 'hebrew' => 'windows-1255',
00384 'arabic' => 'windows-1256',
00385 'baltic' => 'windows-1257',
00386 'estonian' => 'windows-1257',
00387 'lithuanian' => 'windows-1257',
00388 'vietnamese' => 'windows-1258',
00389 'thai' => 'cp874',
00390 'korean' => 'cp949',
00391 'chinese' => 'gb2312',
00392 'japanese' => 'shift_jis',
00393 'simpl_chinese' => 'gb2312',
00394 'trad_chinese' => 'big5',
00395 );
00396
00397
00398 var $locale_to_charset=array(
00399 'japanese.euc' => 'euc-jp',
00400 'ja_jp.ujis' => 'euc-jp',
00401 'korean.euc' => 'euc-kr',
00402 'zh_cn' => 'gb2312',
00403 'zh_hk' => 'big5',
00404 'zh_tw' => 'big5',
00405 );
00406
00407
00408
00409 var $charSetArray = array(
00410 'dk' => '',
00411 'de' => '',
00412 'no' => '',
00413 'it' => '',
00414 'fr' => '',
00415 'es' => '',
00416 'nl' => '',
00417 'cz' => 'windows-1250',
00418 'pl' => 'iso-8859-2',
00419 'si' => 'windows-1250',
00420 'fi' => '',
00421 'tr' => 'iso-8859-9',
00422 'se' => '',
00423 'pt' => '',
00424 'ru' => 'windows-1251',
00425 'ro' => 'iso-8859-2',
00426 'ch' => 'gb2312',
00427 'sk' => 'windows-1250',
00428 'lt' => 'windows-1257',
00429 'is' => 'utf-8',
00430 'hr' => 'windows-1250',
00431 'hu' => 'iso-8859-2',
00432 'gl' => '',
00433 'th' => 'iso-8859-11',
00434 'gr' => 'iso-8859-7',
00435 'hk' => 'big5',
00436 'eu' => '',
00437 'bg' => 'windows-1251',
00438 'br' => '',
00439 'et' => 'iso-8859-4',
00440 'ar' => 'iso-8859-6',
00441 'he' => 'utf-8',
00442 'ua' => 'windows-1251',
00443 'jp' => 'shift_jis',
00444 'lv' => 'utf-8',
00445 'vn' => 'utf-8',
00446 'ca' => 'iso-8859-15',
00447 'ba' => 'iso-8859-2',
00448 'kr' => 'euc-kr',
00449 'eo' => 'utf-8',
00450 'my' => '',
00451 'hi' => 'utf-8',
00452 );
00453
00454
00455
00456 var $isoArray = array(
00457 'dk' => 'da',
00458 'de' => '',
00459 'no' => '',
00460 'it' => '',
00461 'fr' => '',
00462 'es' => '',
00463 'nl' => '',
00464 'cz' => 'cs',
00465 'pl' => '',
00466 'si' => 'sl',
00467 'fi' => '',
00468 'tr' => '',
00469 'se' => 'sv',
00470 'pt' => '',
00471 'ru' => '',
00472 'ro' => '',
00473 'ch' => 'zh_CN',
00474 'sk' => '',
00475 'lt' => '',
00476 'is' => '',
00477 'hr' => '',
00478 'hu' => '',
00479 'gl' => '',
00480 'th' => '',
00481 'gr' => 'el',
00482 'hk' => 'zh_HK',
00483 'eu' => '',
00484 'bg' => '',
00485 'br' => 'pt_BR',
00486 'et' => '',
00487 'ar' => '',
00488 'he' => 'iw',
00489 'ua' => 'uk',
00490 'jp' => 'ja',
00491 'lv' => '',
00492 'vn' => 'vi',
00493 'ca' => '',
00494 'ba' => '',
00495 'kr' => '',
00496 );
00497
00505 function parse_charset($charset) {
00506 $charset = strtolower($charset);
00507 if (isset($this->synonyms[$charset])) $charset = $this->synonyms[$charset];
00508
00509 return $charset;
00510 }
00511
00524 function get_locale_charset($locale) {
00525 $locale = strtolower($locale);
00526
00527
00528 if (isset($this->locale_to_charset[$locale])) return $this->locale_to_charset[$locale];
00529
00530
00531 list($locale,$modifier) = explode('@',$locale);
00532
00533
00534 list($locale,$charset) = explode('.',$locale);
00535 if ($charset) return $this->parse_charset($charset);
00536
00537
00538 if ($modifier == 'euro') return 'iso-8859-15';
00539
00540
00541 list($language,$country) = explode('_',$locale);
00542 if (isset($this->lang_to_langfamily[$language])) $language = $this->lang_to_langfamily[$language];
00543
00544 if (TYPO3_OS == 'WIN') {
00545 $cs = $this->lang_to_charset_windows[$language];
00546 } else {
00547 $cs = $this->lang_to_charset_unix[$language];
00548 }
00549
00550 return $cs ? $cs : 'iso-8859-1';
00551 }
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00564
00565
00566
00577 function conv($str,$fromCS,$toCS,$useEntityForNoChar=0) {
00578 if ($fromCS==$toCS) return $str;
00579
00580
00581 if ($toCS=='utf-8' || !$useEntityForNoChar) {
00582 switch($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod']) {
00583 case 'mbstring':
00584 $conv_str = mb_convert_encoding($str,$toCS,$fromCS);
00585 if (false !== $conv_str) return $conv_str;
00586 break;
00587
00588 case 'iconv':
00589 $conv_str = iconv($fromCS,$toCS.'
00590 if (false !== $conv_str) return $conv_str;
00591 break;
00592
00593 case 'recode':
00594 $conv_str = recode_string($fromCS.'..'.$toCS,$str);
00595 if (false !== $conv_str) return $conv_str;
00596 break;
00597 }
00598
00599 }
00600
00601 if ($fromCS!='utf-8') $str=$this->utf8_encode($str,$fromCS);
00602 if ($toCS!='utf-8') $str=$this->utf8_decode($str,$toCS,$useEntityForNoChar);
00603 return $str;
00604 }
00605
00617 function convArray(&$array,$fromCS,$toCS,$useEntityForNoChar=0) {
00618 foreach($array as $key => $value) {
00619 if (is_array($array[$key])) {
00620 $this->convArray($array[$key],$fromCS,$toCS,$useEntityForNoChar);
00621 } else {
00622 $array[$key] = $this->conv($array[$key],$fromCS,$toCS,$useEntityForNoChar);
00623 }
00624 }
00625 }
00626
00634 function utf8_encode($str,$charset) {
00635
00636 if ($charset === 'utf-8') return $str;
00637
00638
00639 if ($this->initCharset($charset)) {
00640 $strLen = strlen($str);
00641 $outStr='';
00642
00643 for ($a=0;$a<$strLen;$a++) {
00644 $chr=substr($str,$a,1);
00645 $ord=ord($chr);
00646 if (isset($this->twoByteSets[$charset])) {
00647 $ord2 = ord($str{$a+1});
00648 $ord = $ord<<8 & $ord2;
00649
00650 if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
00651 $outStr.=$this->parsedCharsets[$charset]['local'][$ord];
00652 } else $outStr.=chr($this->noCharByteVal);
00653 $a++;
00654 } elseif ($ord>127) {
00655 if (isset($this->eucBasedSets[$charset])) {
00656 if ($charset != 'shift_jis' || ($ord < 0xA0 || $ord > 0xDF)) {
00657 $a++;
00658 $ord2=ord(substr($str,$a,1));
00659 $ord = $ord*256+$ord2;
00660 }
00661 }
00662
00663 if (isset($this->parsedCharsets[$charset]['local'][$ord])) {
00664 $outStr.= $this->parsedCharsets[$charset]['local'][$ord];
00665 } else $outStr.= chr($this->noCharByteVal);
00666 } else $outStr.= $chr;
00667 }
00668 return $outStr;
00669 }
00670 }
00671
00680 function utf8_decode($str,$charset,$useEntityForNoChar=0) {
00681
00682
00683 if ($this->initCharset($charset)) {
00684 $strLen = strlen($str);
00685 $outStr='';
00686 $buf='';
00687 for ($a=0,$i=0;$a<$strLen;$a++,$i++) {
00688 $chr=substr($str,$a,1);
00689 $ord=ord($chr);
00690 if ($ord>127) {
00691 if ($ord & 64) {
00692
00693 $buf=$chr;
00694 for ($b=0;$b<8;$b++) {
00695 $ord = $ord << 1;
00696 if ($ord & 128) {
00697 $a++;
00698 $buf.=substr($str,$a,1);
00699 } else break;
00700 }
00701
00702 if (isset($this->parsedCharsets[$charset]['utf8'][$buf])) {
00703 $mByte = $this->parsedCharsets[$charset]['utf8'][$buf];
00704 if ($mByte>255) {
00705 $outStr.= chr(($mByte >> 8) & 255).chr($mByte & 255);
00706 } else $outStr.= chr($mByte);
00707 } elseif ($useEntityForNoChar) {
00708 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';';
00709 } else $outStr.=chr($this->noCharByteVal);
00710 } else $outStr.=chr($this->noCharByteVal);
00711 } else $outStr.=$chr;
00712 }
00713 return $outStr;
00714 }
00715 }
00716
00723 function utf8_to_entities($str) {
00724 $strLen = strlen($str);
00725 $outStr='';
00726 $buf='';
00727 for ($a=0;$a<$strLen;$a++) {
00728 $chr=substr($str,$a,1);
00729 $ord=ord($chr);
00730 if ($ord>127) {
00731 if ($ord & 64) {
00732 $buf=$chr;
00733 for ($b=0;$b<8;$b++) {
00734 $ord = $ord << 1;
00735 if ($ord & 128) {
00736 $a++;
00737 $buf.=substr($str,$a,1);
00738 } else break;
00739 }
00740
00741 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';';
00742 } else $outStr.=chr($this->noCharByteVal);
00743 } else $outStr.=$chr;
00744 }
00745
00746 return $outStr;
00747 }
00748
00756 function entities_to_utf8($str,$alsoStdHtmlEnt=0) {
00757 if ($alsoStdHtmlEnt) {
00758 $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES));
00759 }
00760
00761 $token = md5(microtime());
00762 $parts = explode($token,ereg_replace('(&([#[:alnum:]]*);)',$token.'\2'.$token,$str));
00763 foreach($parts as $k => $v) {
00764 if ($k%2) {
00765 if (substr($v,0,1)=='#') {
00766 if (substr($v,1,1)=='x') {
00767 $parts[$k] = $this->UnumberToChar(hexdec(substr($v,2)));
00768 } else {
00769 $parts[$k] = $this->UnumberToChar(substr($v,1));
00770 }
00771 } elseif ($alsoStdHtmlEnt && $trans_tbl['&'.$v.';']) {
00772 $parts[$k] = $this->utf8_encode($trans_tbl['&'.$v.';'],'iso-8859-1');
00773 } else {
00774 $parts[$k] ='&'.$v.';';
00775 }
00776 }
00777 }
00778
00779 return implode('',$parts);
00780 }
00781
00790 function utf8_to_numberarray($str,$convEntities=0,$retChar=0) {
00791
00792 if ($convEntities) {
00793 $str = $this->entities_to_utf8($str,1);
00794 }
00795
00796 $strLen = strlen($str);
00797 $outArr=array();
00798 $buf='';
00799 for ($a=0;$a<$strLen;$a++) {
00800 $chr=substr($str,$a,1);
00801 $ord=ord($chr);
00802 if ($ord>127) {
00803 if ($ord & 64) {
00804 $buf=$chr;
00805 for ($b=0;$b<8;$b++) {
00806 $ord = $ord << 1;
00807 if ($ord & 128) {
00808 $a++;
00809 $buf.=substr($str,$a,1);
00810 } else break;
00811 }
00812
00813 $outArr[]=$retChar?$buf:$this->utf8CharToUnumber($buf);
00814 } else $outArr[]=$retChar?chr($this->noCharByteVal):$this->noCharByteVal;
00815 } else $outArr[]=$retChar?chr($ord):$ord;
00816 }
00817
00818 return $outArr;
00819 }
00820
00840 function UnumberToChar($cbyte) {
00841 $str='';
00842
00843 if ($cbyte < 0x80) {
00844 $str.=chr($cbyte);
00845 } else if ($cbyte < 0x800) {
00846 $str.=chr(0xC0 | ($cbyte >> 6));
00847 $str.=chr(0x80 | ($cbyte & 0x3F));
00848 } else if ($cbyte < 0x10000) {
00849 $str.=chr(0xE0 | ($cbyte >> 12));
00850 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00851 $str.=chr(0x80 | ($cbyte & 0x3F));
00852 } else if ($cbyte < 0x200000) {
00853 $str.=chr(0xF0 | ($cbyte >> 18));
00854 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00855 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00856 $str.=chr(0x80 | ($cbyte & 0x3F));
00857 } else if ($cbyte < 0x4000000) {
00858 $str.=chr(0xF8 | ($cbyte >> 24));
00859 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F));
00860 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00861 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00862 $str.=chr(0x80 | ($cbyte & 0x3F));
00863 } else if ($cbyte < 0x80000000) {
00864 $str.=chr(0xFC | ($cbyte >> 30));
00865 $str.=chr(0x80 | (($cbyte >> 24) & 0x3F));
00866 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F));
00867 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F));
00868 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F));
00869 $str.=chr(0x80 | ($cbyte & 0x3F));
00870 } else {
00871 $str .= chr($this->noCharByteVal);
00872 }
00873 return $str;
00874 }
00875
00885 function utf8CharToUnumber($str,$hex=0) {
00886 $ord=ord(substr($str,0,1));
00887
00888 if (($ord & 192) == 192) {
00889 $binBuf='';
00890 for ($b=0;$b<8;$b++) {
00891 $ord = $ord << 1;
00892 if ($ord & 128) {
00893 $binBuf.=substr('00000000'.decbin(ord(substr($str,$b+1,1))),-6);
00894 } else break;
00895 }
00896 $binBuf=substr('00000000'.decbin(ord(substr($str,0,1))),-(6-$b)).$binBuf;
00897
00898 $int = bindec($binBuf);
00899 } else $int = $ord;
00900
00901 return $hex ? 'x'.dechex($int) : $int;
00902 }
00903
00904
00905
00906
00907
00908
00909
00910
00911
00912
00913
00914
00915
00916
00917
00928 function initCharset($charset) {
00929
00930 if (!is_array($this->parsedCharsets[$charset])) {
00931
00932
00933 $charsetConvTableFile = PATH_t3lib.'csconvtbl/'.$charset.'.tbl';
00934
00935
00936 if ($charset && t3lib_div::validPathStr($charsetConvTableFile) && @is_file($charsetConvTableFile)) {
00937
00938
00939 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/charset_'.$charset.'.tbl');
00940 if ($cacheFile && @is_file($cacheFile)) {
00941 $this->parsedCharsets[$charset]=unserialize(t3lib_div::getUrl($cacheFile));
00942 } else {
00943
00944 $lines=t3lib_div::trimExplode(chr(10),t3lib_div::getUrl($charsetConvTableFile),1);
00945
00946 $this->parsedCharsets[$charset]=array('local'=>array(),'utf8'=>array());
00947
00948 $detectedType='';
00949 foreach($lines as $value) {
00950 if (trim($value) && substr($value,0,1)!='#') {
00951
00952
00953
00954 if (!$detectedType) $detectedType = ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value) ? 'whitespaced' : 'ms-token';
00955
00956 if ($detectedType=='ms-token') {
00957 list($hexbyte,$utf8) = split('=|:',$value,3);
00958 } elseif ($detectedType=='whitespaced') {
00959 $regA=array();
00960 ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value,$regA);
00961 $hexbyte = $regA[1];
00962 $utf8 = 'U+'.$regA[2];
00963 }
00964 $decval = hexdec(trim($hexbyte));
00965 if ($decval>127) {
00966 $utf8decval = hexdec(substr(trim($utf8),2));
00967 $this->parsedCharsets[$charset]['local'][$decval]=$this->UnumberToChar($utf8decval);
00968 $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]]=$decval;
00969 }
00970 }
00971 }
00972 if ($cacheFile) {
00973 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->parsedCharsets[$charset]));
00974 }
00975 }
00976 return 2;
00977 } else return false;
00978 } else return 1;
00979 }
00980
00990 function initUnicodeData($mode=null) {
00991
00992 $cacheFileCase = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_utf-8.tbl');
00993 $cacheFileASCII = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_utf-8.tbl');
00994
00995
00996 switch($mode) {
00997 case 'case':
00998 if (is_array($this->caseFolding['utf-8'])) return 1;
00999
01000
01001 if ($cacheFileCase && @is_file($cacheFileCase)) {
01002 $this->caseFolding['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileCase));
01003 return 2;
01004 }
01005 break;
01006
01007 case 'ascii':
01008 if (is_array($this->toASCII['utf-8'])) return 1;
01009
01010
01011 if ($cacheFileASCII && @is_file($cacheFileASCII)) {
01012 $this->toASCII['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileASCII));
01013 return 2;
01014 }
01015 break;
01016 }
01017
01018
01019 $unicodeDataFile = PATH_t3lib.'unidata/UnicodeData.txt';
01020 if (!(t3lib_div::validPathStr($unicodeDataFile) && @is_file($unicodeDataFile))) return false;
01021
01022 $fh = fopen($unicodeDataFile,'rb');
01023 if (!$fh) return false;
01024
01025
01026
01027 $this->caseFolding['utf-8'] = array();
01028 $utf8CaseFolding =& $this->caseFolding['utf-8'];
01029 $utf8CaseFolding['toUpper'] = array();
01030 $utf8CaseFolding['toLower'] = array();
01031 $utf8CaseFolding['toTitle'] = array();
01032
01033 $decomposition = array();
01034 $mark = array();
01035 $number = array();
01036 $omit = array();
01037
01038 while (!feof($fh)) {
01039 $line = fgets($fh,4096);
01040
01041 list($char,$name,$cat,,,$decomp,,,$num,,,,$upper,$lower,$title,) = split(';', rtrim($line));
01042
01043 $ord = hexdec($char);
01044 if ($ord > 0xFFFF) break;
01045
01046 $utf8_char = $this->UnumberToChar($ord);
01047
01048 if ($upper) $utf8CaseFolding['toUpper'][$utf8_char] = $this->UnumberToChar(hexdec($upper));
01049 if ($lower) $utf8CaseFolding['toLower'][$utf8_char] = $this->UnumberToChar(hexdec($lower));
01050
01051 if ($title && $title != $upper) $utf8CaseFolding['toTitle'][$utf8_char] = $this->UnumberToChar(hexdec($title));
01052
01053 switch ($cat{0}) {
01054 case 'M':
01055 $mark["U+$char"] = 1;
01056 break;
01057
01058 case 'N':
01059 if ($ord > 0x80 && $num != '') $number["U+$char"] = $num;
01060 }
01061
01062
01063 $match = array();
01064 if (ereg('^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH',$name,$match) && !$decomp) {
01065 $c = ord($match[2]);
01066 if ($match[1] == 'SMALL') $c += 32;
01067
01068 $decomposition["U+$char"] = array(dechex($c));
01069 continue;
01070 }
01071
01072 $match = array();
01073 if (ereg('(<.*>)? *(.+)',$decomp,$match)) {
01074 switch($match[1]) {
01075 case '<circle>':
01076 $match[2] = '0028 '.$match[2].' 0029';
01077 break;
01078
01079 case '<square>':
01080 $match[2] = '005B '.$match[2].' 005D';
01081 break;
01082
01083 case '<compat>':
01084 if (ereg('^0020 ',$match[2])) continue 2;
01085 break;
01086
01087
01088 case '<initial>':
01089 case '<medial>':
01090 case '<final>':
01091 case '<isolated>':
01092 case '<vertical>':
01093 continue 2;
01094 }
01095 $decomposition["U+$char"] = split(' ',$match[2]);
01096 }
01097 }
01098 fclose($fh);
01099
01100
01101 $specialCasingFile = PATH_t3lib.'unidata/SpecialCasing.txt';
01102 if (t3lib_div::validPathStr($specialCasingFile) && @is_file($specialCasingFile)) {
01103 $fh = fopen($specialCasingFile,'rb');
01104 if ($fh) {
01105 while (!feof($fh)) {
01106 $line = fgets($fh,4096);
01107 if ($line{0} != '#' && trim($line) != '') {
01108
01109 list($char,$lower,$title,$upper,$cond) = t3lib_div::trimExplode(';', $line);
01110 if ($cond == '' || $cond{0} == '#') {
01111 $utf8_char = $this->UnumberToChar(hexdec($char));
01112 if ($char != $lower) {
01113 $arr = split(' ',$lower);
01114 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01115 $utf8CaseFolding['toLower'][$utf8_char] = implode('',$arr);
01116 }
01117 if ($char != $title && $title != $upper) {
01118 $arr = split(' ',$title);
01119 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01120 $utf8CaseFolding['toTitle'][$utf8_char] = implode('',$arr);
01121 }
01122 if ($char != $upper) {
01123 $arr = split(' ',$upper);
01124 for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i]));
01125 $utf8CaseFolding['toUpper'][$utf8_char] = implode('',$arr);
01126 }
01127 }
01128 }
01129 }
01130 fclose($fh);
01131 }
01132 }
01133
01134
01135 $customTranslitFile = PATH_t3lib.'unidata/Translit.txt';
01136 if (t3lib_div::validPathStr($customTranslitFile) && @is_file($customTranslitFile)) {
01137 $fh = fopen($customTranslitFile,'rb');
01138 if ($fh) {
01139 while (!feof($fh)) {
01140 $line = fgets($fh,4096);
01141 if ($line{0} != '#' && trim($line) != '') {
01142 list($char,$translit) = t3lib_div::trimExplode(';', $line);
01143 if (!$translit) $omit["U+$char"] = 1;
01144 $decomposition["U+$char"] = split(' ', $translit);
01145
01146 }
01147 }
01148 fclose($fh);
01149 }
01150 }
01151
01152
01153 foreach($decomposition as $from => $to) {
01154 $code_decomp = array();
01155
01156 while ($code_value = array_shift($to)) {
01157 if (isset($decomposition["U+$code_value"])) {
01158 foreach(array_reverse($decomposition["U+$code_value"]) as $cv) {
01159 array_unshift($to, $cv);
01160 }
01161 } elseif (!isset($mark["U+$code_value"])) {
01162 array_push($code_decomp, $code_value);
01163 }
01164 }
01165 if (count($code_decomp) || isset($omit[$from])) {
01166 $decomposition[$from] = $code_decomp;
01167 } else {
01168 unset($decomposition[$from]);
01169 }
01170 }
01171
01172
01173 $this->toASCII['utf-8'] = array();
01174 $ascii =& $this->toASCII['utf-8'];
01175
01176 foreach($decomposition as $from => $to) {
01177 $code_decomp = array();
01178 while ($code_value = array_shift($to)) {
01179 $ord = hexdec($code_value);
01180 if ($ord > 127)
01181 continue 2;
01182 else
01183 array_push($code_decomp,chr($ord));
01184 }
01185 $ascii[$this->UnumberToChar(hexdec($from))] = join('',$code_decomp);
01186 }
01187
01188
01189 foreach($number as $from => $to) {
01190 $utf8_char = $this->UnumberToChar(hexdec($from));
01191 if (!isset($ascii[$utf8_char])) {
01192 $ascii[$utf8_char] = $to;
01193 }
01194 }
01195
01196 if ($cacheFileCase) {
01197 t3lib_div::writeFileToTypo3tempDir($cacheFileCase,serialize($utf8CaseFolding));
01198 }
01199
01200 if ($cacheFileASCII) {
01201 t3lib_div::writeFileToTypo3tempDir($cacheFileASCII,serialize($ascii));
01202 }
01203
01204 return 3;
01205 }
01206
01215 function initCaseFolding($charset) {
01216
01217 if (is_array($this->caseFolding[$charset])) return 1;
01218
01219
01220 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_'.$charset.'.tbl');
01221 if ($cacheFile && @is_file($cacheFile)) {
01222 $this->caseFolding[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
01223 return 2;
01224 }
01225
01226
01227 if (!$this->initCharset($charset)) {
01228 return false;
01229 }
01230
01231
01232 if (!$this->initUnicodeData('case')) {
01233 return false;
01234 }
01235
01236 $nochar = chr($this->noCharByteVal);
01237 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) {
01238
01239 $c = $this->utf8_decode($utf8, $charset);
01240
01241
01242 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toUpper'][$utf8], $charset);
01243 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toUpper'][$c] = $cc;
01244
01245
01246 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toLower'][$utf8], $charset);
01247 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toLower'][$c] = $cc;
01248
01249
01250 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toTitle'][$utf8], $charset);
01251 if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toTitle'][$c] = $cc;
01252 }
01253
01254
01255 for ($i=ord('a'); $i<=ord('z'); $i++) {
01256 $this->caseFolding[$charset]['toUpper'][chr($i)] = chr($i-32);
01257 }
01258 for ($i=ord('A'); $i<=ord('Z'); $i++) {
01259 $this->caseFolding[$charset]['toLower'][chr($i)] = chr($i+32);
01260 }
01261
01262 if ($cacheFile) {
01263 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->caseFolding[$charset]));
01264 }
01265
01266 return 3;
01267 }
01268
01277 function initToASCII($charset) {
01278
01279 if (is_array($this->toASCII[$charset])) return 1;
01280
01281
01282 $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_'.$charset.'.tbl');
01283 if ($cacheFile && @is_file($cacheFile)) {
01284 $this->toASCII[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
01285 return 2;
01286 }
01287
01288
01289 if (!$this->initCharset($charset)) {
01290 return false;
01291 }
01292
01293
01294 if (!$this->initUnicodeData('ascii')) {
01295 return false;
01296 }
01297
01298 $nochar = chr($this->noCharByteVal);
01299 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) {
01300
01301 $c = $this->utf8_decode($utf8, $charset);
01302
01303 if (isset($this->toASCII['utf-8'][$utf8])) {
01304 $this->toASCII[$charset][$c] = $this->toASCII['utf-8'][$utf8];
01305 }
01306 }
01307
01308 if ($cacheFile) {
01309 t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->toASCII[$charset]));
01310 }
01311
01312 return 3;
01313 }
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323
01324
01325
01326
01327
01328
01329
01330
01331
01332
01333
01334
01335
01348 function substr($charset,$string,$start,$len=null) {
01349 if ($len===0) return '';
01350
01351 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01352
01353 if ($len==null) {
01354 $enc = mb_internal_encoding();
01355 mb_internal_encoding('utf-8');
01356 $str = mb_substr($string,$start);
01357 mb_internal_encoding($enc);
01358
01359 return $str;
01360 }
01361 else return mb_substr($string,$start,$len,'utf-8');
01362 } elseif ($charset == 'utf-8') {
01363 return $this->utf8_substr($string,$start,$len);
01364 } elseif ($this->eucBasedSets[$charset]) {
01365 return $this->euc_substr($string,$start,$charset,$len);
01366 } elseif ($this->twoByteSets[$charset]) {
01367 return substr($string,$start*2,$len*2);
01368 } elseif ($this->fourByteSets[$charset]) {
01369 return substr($string,$start*4,$len*4);
01370 }
01371
01372
01373 return $len === NULL ? substr($string,$start) : substr($string,$start,$len);
01374 }
01375
01386 function strlen($charset,$string) {
01387 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01388 return mb_strlen($string,$charset);
01389 } elseif ($charset == 'utf-8') {
01390 return $this->utf8_strlen($string);
01391 } elseif ($this->eucBasedSets[$charset]) {
01392 return $this->euc_strlen($string,$charset);
01393 } elseif ($this->twoByteSets[$charset]) {
01394 return strlen($string)/2;
01395 } elseif ($this->fourByteSets[$charset]) {
01396 return strlen($string)/4;
01397 }
01398
01399 return strlen($string);
01400 }
01401
01414 function crop($charset,$string,$len,$crop='') {
01415 if (intval($len) == 0) return $string;
01416
01417 if ($charset == 'utf-8') {
01418 $i = $this->utf8_char2byte_pos($string,$len);
01419 } elseif ($this->eucBasedSets[$charset]) {
01420 $i = $this->euc_char2byte_pos($string,$len,$charset);
01421 } else {
01422 if ($len > 0) {
01423 $i = $len;
01424 } else {
01425 $i = strlen($string)+$len;
01426 if ($i<=0) $i = false;
01427 }
01428 }
01429
01430 if ($i === false) {
01431 return $string;
01432 } else {
01433 if ($len > 0) {
01434 if (strlen($string{$i})) {
01435 return substr($string,0,$i).$crop;
01436
01437 }
01438 } else {
01439 if (strlen($string{$i-1})) {
01440 return $crop.substr($string,$i);
01441 }
01442 }
01443
01444
01445
01446
01447
01448
01449
01450
01451
01452
01453 }
01454 return $string;
01455 }
01456
01467 function strtrunc($charset,$string,$len) {
01468 if ($len <= 0) return '';
01469
01470 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01471 return mb_strcut($string,0,$len,$charset);
01472 } elseif ($charset == 'utf-8') {
01473 return $this->utf8_strtrunc($string,$len);
01474 } elseif ($this->eucBasedSets[$charset]) {
01475 return $this->euc_strtrunc($string,$charset);
01476 } elseif ($this->twoByteSets[$charset]) {
01477 if ($len % 2) $len--;
01478 } elseif ($this->fourByteSets[$charset]) {
01479 $x = $len % 4;
01480 $len -= $x;
01481 }
01482
01483 return substr($string,0,$len);
01484 }
01485
01501 function conv_case($charset,$string,$case) {
01502 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring' && (float)phpversion() >= 4.3) {
01503 if ($case == 'toLower') {
01504 return mb_strtolower($string,'utf-8');
01505 } else {
01506 return mb_strtoupper($string,'utf-8');
01507 }
01508 } elseif ($charset == 'utf-8') {
01509 return $this->utf8_char_mapping($string,'case',$case);
01510 } elseif (isset($this->eucBasedSets[$charset])) {
01511 return $this->euc_char_mapping($string,$charset,'case',$case);
01512 } else {
01513
01514 return $this->sb_char_mapping($string,$charset,'case',$case);
01515 }
01516
01517 return $string;
01518 }
01519
01527 function specCharsToASCII($charset,$string) {
01528 if ($charset == 'utf-8') {
01529 return $this->utf8_char_mapping($string,'ascii');
01530 } elseif (isset($this->eucBasedSets[$charset])) {
01531 return $this->euc_char_mapping($string,$charset,'ascii');
01532 } else {
01533
01534 return $this->sb_char_mapping($string,$charset,'ascii');
01535 }
01536
01537 return $string;
01538 }
01539
01540
01541
01542
01543
01544
01545
01546
01547
01548
01549
01550
01551
01552
01553
01554
01555
01556
01567 function sb_char_mapping($str,$charset,$mode,$opt='') {
01568 switch($mode) {
01569 case 'case':
01570 if (!$this->initCaseFolding($charset)) return $str;
01571 $map =& $this->caseFolding[$charset][$opt];
01572 break;
01573
01574 case 'ascii':
01575 if (!$this->initToASCII($charset)) return $str;
01576 $map =& $this->toASCII[$charset];
01577 break;
01578
01579 default:
01580 return $str;
01581 }
01582
01583 $out = '';
01584 for($i=0; strlen($str{$i}); $i++) {
01585 $c = $str{$i};
01586 if (isset($map[$c])) {
01587 $out .= $map[$c];
01588 } else {
01589 $out .= $c;
01590 }
01591 }
01592
01593 return $out;
01594 }
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608
01609
01610
01622 function utf8_substr($str,$start,$len=null) {
01623 if (!strcmp($len,'0')) return '';
01624
01625 $byte_start = $this->utf8_char2byte_pos($str,$start);
01626 if ($byte_start === false) {
01627 if ($start > 0) {
01628 return false;
01629 } else {
01630 $start = 0;
01631 }
01632 }
01633
01634 $str = substr($str,$byte_start);
01635
01636 if ($len!=null) {
01637 $byte_end = $this->utf8_char2byte_pos($str,$len);
01638 if ($byte_end === false)
01639 return $len<0 ? '' : $str;
01640 else
01641 return substr($str,0,$byte_end);
01642 }
01643 else return $str;
01644 }
01645
01655 function utf8_strlen($str) {
01656 $n=0;
01657 for($i=0; strlen($str{$i}); $i++) {
01658 $c = ord($str{$i});
01659 if (!($c & 0x80))
01660 $n++;
01661 elseif (($c & 0xC0) == 0xC0)
01662 $n++;
01663 }
01664 return $n;
01665 }
01666
01676 function utf8_strtrunc($str,$len) {
01677 $i = $len-1;
01678 if (ord($str{$i}) & 0x80) {
01679 for (; $i>0 && !(ord($str{$i}) & 0x40); $i--) ;
01680 if ($i <= 0) return '';
01681 for ($bc=0, $mbs=ord($str{$i}); $mbs & 0x80; $mbs = $mbs << 1) $bc++;
01682 if ($bc+$i > $len) return substr($str,0,$i);
01683
01684 }
01685 return substr($str,0,$len);
01686 }
01687
01698 function utf8_strpos($haystack,$needle,$offset=0) {
01699 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01700 return mb_strpos($haystack,$needle,'utf-8');
01701 }
01702
01703 $byte_offset = $this->utf8_char2byte_pos($haystack,$offset);
01704 if ($byte_offset === false) return false;
01705
01706 $byte_pos = strpos($haystack,$needle,$byte_offset);
01707 if ($byte_pos === false) return false;
01708
01709 return $this->utf8_byte2char_pos($haystack,$byte_pos);
01710 }
01711
01721 function utf8_strrpos($haystack,$needle) {
01722 if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
01723 return mb_strrpos($haystack,$needle,'utf-8');
01724 }
01725
01726 $byte_pos = strrpos($haystack,$needle);
01727 if ($byte_pos === false) return false;
01728
01729 return $this->utf8_byte2char_pos($haystack,$byte_pos);
01730 }
01731
01741 function utf8_char2byte_pos($str,$pos) {
01742 $n = 0;
01743 $p = abs($pos);
01744
01745 if ($pos >= 0) {
01746 $i = 0;
01747 $d = 1;
01748 } else {
01749 $i = strlen($str)-1;
01750 $d = -1;
01751 }
01752
01753 for( ; strlen($str{$i}) && $n<$p; $i+=$d) {
01754 $c = (int)ord($str{$i});
01755 if (!($c & 0x80))
01756 $n++;
01757 elseif (($c & 0xC0) == 0xC0)
01758 $n++;
01759 }
01760 if (!strlen($str{$i})) return false;
01761
01762 if ($pos >= 0) {
01763
01764 while ((ord($str{$i}) & 0x80) && !(ord($str{$i}) & 0x40)) { $i++; }
01765 } else {
01766
01767 $i++;
01768 }
01769
01770 return $i;
01771 }
01772
01782 function utf8_byte2char_pos($str,$pos) {
01783 $n = 0;
01784 for($i=$pos; $i>0; $i--) {
01785 $c = (int)ord($str{$i});
01786 if (!($c & 0x80))
01787 $n++;
01788 elseif (($c & 0xC0) == 0xC0)
01789 $n++;
01790 }
01791 if (!strlen($str{$i})) return false;
01792
01793 return $n;
01794 }
01795
01805 function utf8_char_mapping($str,$mode,$opt='') {
01806 if (!$this->initUnicodeData($mode)) return $str;
01807
01808 $out = '';
01809 switch($mode) {
01810 case 'case':
01811 $map =& $this->caseFolding['utf-8'][$opt];
01812 break;
01813
01814 case 'ascii':
01815 $map =& $this->toASCII['utf-8'];
01816 break;
01817
01818 default:
01819 return $str;
01820 }
01821
01822 for($i=0; strlen($str{$i}); $i++) {
01823 $c = ord($str{$i});
01824 if (!($c & 0x80))
01825 $mbc = $str{$i};
01826 elseif (($c & 0xC0) == 0xC0) {
01827 for ($bc=0; $c & 0x80; $c = $c << 1) { $bc++; }
01828 $mbc = substr($str,$i,$bc);
01829 $i += $bc-1;
01830 }
01831
01832 if (isset($map[$mbc])) {
01833 $out .= $map[$mbc];
01834 } else {
01835 $out .= $mbc;
01836 }
01837 }
01838
01839 return $out;
01840 }
01841
01842
01843
01844
01845
01846
01847
01848
01849
01850
01851
01852
01853
01854
01855
01856
01857
01858
01859
01860
01861
01862
01863
01864
01865
01866
01867
01868
01869
01870
01881 function euc_strtrunc($str,$len,$charset) {
01882 $sjis = ($charset == 'shift_jis');
01883 for ($i=0; strlen($str{$i}) && $i<$len; $i++) {
01884 $c = ord($str{$i});
01885 if ($sjis) {
01886 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++;
01887 }
01888 else {
01889 if ($c >= 0x80) $i++;
01890 }
01891 }
01892 if (!strlen($str{$i})) return $str;
01893
01894 if ($i>$len)
01895 return substr($str,0,$len-1);
01896 else
01897 return substr($str,0,$len);
01898 }
01899
01910 function euc_substr($str,$start,$charset,$len=null) {
01911 $byte_start = $this->euc_char2byte_pos($str,$start,$charset);
01912 if ($byte_start === false) return false;
01913
01914 $str = substr($str,$byte_start);
01915
01916 if ($len!=null) {
01917 $byte_end = $this->euc_char2byte_pos($str,$len,$charset);
01918 if ($byte_end === false)
01919 return $str;
01920 else
01921 return substr($str,0,$byte_end);
01922 }
01923 else return $str;
01924 }
01925
01935 function euc_strlen($str,$charset) {
01936 $sjis = ($charset == 'shift_jis');
01937 $n=0;
01938 for ($i=0; strlen($str{$i}); $i++) {
01939 $c = ord($str{$i});
01940 if ($sjis) {
01941 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++;
01942 }
01943 else {
01944 if ($c >= 0x80) $i++;
01945 }
01946
01947 $n++;
01948 }
01949
01950 return $n;
01951 }
01952
01962 function euc_char2byte_pos($str,$pos,$charset) {
01963 $sjis = ($charset == 'shift_jis');
01964 $n = 0;
01965 $p = abs($pos);
01966
01967 if ($pos >= 0) {
01968 $i = 0;
01969 $d = 1;
01970 } else {
01971 $i = strlen($str)-1;
01972 $d = -1;
01973 }
01974
01975 for ( ; strlen($str{$i}) && $n<$p; $i+=$d) {
01976 $c = ord($str{$i});
01977 if ($sjis) {
01978 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i+=$d;
01979 }
01980 else {
01981 if ($c >= 0x80) $i+=$d;
01982 }
01983
01984 $n++;
01985 }
01986 if (!strlen($str{$i})) return false;
01987
01988 if ($pos < 0) $i++;
01989
01990 return $i;
01991 }
01992
02003 function euc_char_mapping($str,$charset,$mode,$opt='') {
02004 switch($mode) {
02005 case 'case':
02006 if (!$this->initCaseFolding($charset)) return $str;
02007 $map =& $this->caseFolding[$charset][$opt];
02008 break;
02009
02010 case 'ascii':
02011 if (!$this->initToASCII($charset)) return $str;
02012 $map =& $this->toASCII[$charset];
02013 break;
02014
02015 default:
02016 return $str;
02017 }
02018
02019 $sjis = ($charset == 'shift_jis');
02020 $out = '';
02021 for($i=0; strlen($str{$i}); $i++) {
02022 $mbc = $str{$i};
02023 $c = ord($mbc);
02024
02025 if ($sjis) {
02026 if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) {
02027 $mbc = substr($str,$i,2);
02028 $i++;
02029 }
02030 }
02031 else {
02032 if ($c >= 0x80) {
02033 $mbc = substr($str,$i,2);
02034 $i++;
02035 }
02036 }
02037
02038 if (isset($map[$mbc])) {
02039 $out .= $map[$mbc];
02040 } else {
02041 $out .= $mbc;
02042 }
02043 }
02044
02045 return $out;
02046 }
02047
02048 }
02049
02050 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']) {
02051 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']);
02052 }
02053 ?>