<?php
  include_once '../doc-typo3-funcs.php';
  $doxygen_vars = array(	"title" => "TYPO3 4.0.1: typo3_src-4.0.1/t3lib/class.t3lib_cs.php Source File",
				"datetime" => "Sat Dec 2 19:22:17 2006",
				"date" => "2 Dec 2006",
				"doxygenversion" => "1.4.6",
				"projectname" => "TYPO3 4.0.1",
				"projectnumber" => "4.0.1"
			);
  get_header($doxygen_vars);
?>
<!-- Generated by Doxygen 1.4.6 -->
<div class="tabs">
  <ul>
    <li><a href="main.html"><span>Main&nbsp;Page</span></a></li>
    <li><a href="namespaces.html"><span>Namespaces</span></a></li>
    <li><a href="classes.html"><span>Classes</span></a></li>
    <li id="current"><a href="files.html"><span>Files</span></a></li>
    <li><a href="dirs.html"><span>Directories</span></a></li>
    <li><a href="pages.html"><span>Related&nbsp;Pages</span></a></li>
    <li><a href="examples.html"><span>Examples</span></a></li>
    <li>
      <form action="search.php" method="get">
        <table cellspacing="0" cellpadding="0" border="0">
          <tr>
            <td><label>&nbsp;<u>S</u>earch&nbsp;for&nbsp;</label></td>
            <td><input type="text" name="query" value="" size="20" accesskey="s"/></td>
          </tr>
        </table>
      </form>
    </li>
  </ul></div>
<div class="nav">
<a class="el" href="dir_c8daf1ad746050abf985cc546c89e248.html">typo3_src-4.0.1</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_9d0e5c424a38b69aeeedc616a9634e5f.html">t3lib</a></div>
<h1>class.t3lib_cs.php</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 &lt;?php
<a name="l00002"></a>00002 <span class="comment">/***************************************************************</span>
<a name="l00003"></a>00003 <span class="comment">*  Copyright notice</span>
<a name="l00004"></a>00004 <span class="comment">*</span>
<a name="l00005"></a>00005 <span class="comment">*  (c) 2003-2006 Kasper Skaarhoj (kasperYYYY@typo3.com)</span>
<a name="l00006"></a>00006 <span class="comment">*  All rights reserved</span>
<a name="l00007"></a>00007 <span class="comment">*</span>
<a name="l00008"></a>00008 <span class="comment">*  This script is part of the Typo3 project. The Typo3 project is</span>
<a name="l00009"></a>00009 <span class="comment">*  free software; you can redistribute it and/or modify</span>
<a name="l00010"></a>00010 <span class="comment">*  it under the terms of the GNU General Public License as published by</span>
<a name="l00011"></a>00011 <span class="comment">*  the Free Software Foundation; either version 2 of the License, or</span>
<a name="l00012"></a>00012 <span class="comment">*  (at your option) any later version.</span>
<a name="l00013"></a>00013 <span class="comment">*</span>
<a name="l00014"></a>00014 <span class="comment">*  The GNU General Public License can be found at</span>
<a name="l00015"></a>00015 <span class="comment">*  http://www.gnu.org/copyleft/gpl.html.</span>
<a name="l00016"></a>00016 <span class="comment">*</span>
<a name="l00017"></a>00017 <span class="comment">*  This script is distributed in the hope that it will be useful,</span>
<a name="l00018"></a>00018 <span class="comment">*  but WITHOUT ANY WARRANTY; without even the implied warranty of</span>
<a name="l00019"></a>00019 <span class="comment">*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the</span>
<a name="l00020"></a>00020 <span class="comment">*  GNU General Public License for more details.</span>
<a name="l00021"></a>00021 <span class="comment">*</span>
<a name="l00022"></a>00022 <span class="comment">*  This copyright notice MUST APPEAR in all copies of the script!</span>
<a name="l00023"></a>00023 <span class="comment">***************************************************************/</span>
<a name="l00136"></a><a class="code" href="classt3lib__cs.html">00136</a> <span class="keyword">class </span><a class="code" href="classt3lib__cs.html">t3lib_cs</a> {
<a name="l00137"></a><a class="code" href="classt3lib__cs.html#9ab316dd87b5b365f857eb85f4a0a9df">00137</a>         var <a class="code" href="classt3lib__cs.html#9ab316dd87b5b365f857eb85f4a0a9df">$noCharByteVal</a>=63;          <span class="comment">// ASCII Value for chars with no equivalent.</span>
<a name="l00138"></a>00138 
<a name="l00139"></a>00139                 <span class="comment">// This is the array where parsed conversion tables are stored (cached)</span>
<a name="l00140"></a><a class="code" href="classt3lib__cs.html#4ebc1d87207362e257e0faf256f8d0b6">00140</a>         var <a class="code" href="classt3lib__cs.html#4ebc1d87207362e257e0faf256f8d0b6">$parsedCharsets</a>=array();
<a name="l00141"></a>00141 
<a name="l00142"></a>00142                 <span class="comment">// An array where case folding data will be stored (cached)</span>
<a name="l00143"></a><a class="code" href="classt3lib__cs.html#9912264b38a9ea1f6bd8165e9990b9ce">00143</a>         var <a class="code" href="classt3lib__cs.html#9912264b38a9ea1f6bd8165e9990b9ce">$caseFolding</a>=array();
<a name="l00144"></a>00144 
<a name="l00145"></a>00145                 <span class="comment">// An array where charset-to-ASCII mappings are stored (cached)</span>
<a name="l00146"></a><a class="code" href="classt3lib__cs.html#cbc9aa09194a1f907d36d85912802a45">00146</a>         var <a class="code" href="classt3lib__cs.html#cbc9aa09194a1f907d36d85912802a45">$toASCII</a>=array();
<a name="l00147"></a>00147 
<a name="l00148"></a>00148                 <span class="comment">// This tells the converter which charsets has two bytes per char:</span>
<a name="l00149"></a><a class="code" href="classt3lib__cs.html#6f5a9f0242a1c7c7e6d2606ded10a2e6">00149</a>         var <a class="code" href="classt3lib__cs.html#6f5a9f0242a1c7c7e6d2606ded10a2e6">$twoByteSets</a>=array(
<a name="l00150"></a>00150                 'ucs-2'=&gt;1,     <span class="comment">// 2-byte Unicode</span>
<a name="l00151"></a>00151         );
<a name="l00152"></a>00152 
<a name="l00153"></a>00153                 <span class="comment">// This tells the converter which charsets has four bytes per char:</span>
<a name="l00154"></a><a class="code" href="classt3lib__cs.html#f03f4dc2397537781d7bd003b172fed6">00154</a>         var <a class="code" href="classt3lib__cs.html#f03f4dc2397537781d7bd003b172fed6">$fourByteSets</a>=array(
<a name="l00155"></a>00155                 'ucs-4'=&gt;1,     <span class="comment">// 4-byte Unicode</span>
<a name="l00156"></a>00156                 'utf-32'=&gt;1,    <span class="comment">// 4-byte Unicode (limited to the 21-bits of UTF-16)</span>
<a name="l00157"></a>00157         );
<a name="l00158"></a>00158 
<a name="l00159"></a>00159                 <span class="comment">// This tells the converter which charsets use a scheme like the Extended Unix Code:</span>
<a name="l00160"></a><a class="code" href="classt3lib__cs.html#7de65ab48a3a93ebaf8ee18b47e49287">00160</a>         var <a class="code" href="classt3lib__cs.html#7de65ab48a3a93ebaf8ee18b47e49287">$eucBasedSets</a>=array(
<a name="l00161"></a>00161                 'gb2312'=&gt;1,            <span class="comment">// Chinese, simplified.</span>
<a name="l00162"></a>00162                 'big5'=&gt;1,              <span class="comment">// Chinese, traditional.</span>
<a name="l00163"></a>00163                 'euc-kr'=&gt;1,            <span class="comment">// Korean</span>
<a name="l00164"></a>00164                 'shift_jis'=&gt;1,         <span class="comment">// Japanese - WARNING: Shift-JIS includes half-width katakana single-bytes characters above 0x80!</span>
<a name="l00165"></a>00165         );
<a name="l00166"></a>00166 
<a name="l00167"></a>00167                 <span class="comment">// see  http://developer.apple.com/documentation/macos8/TextIntlSvcs/TextEncodingConversionManager/TEC1.5/TEC.b0.html</span>
<a name="l00168"></a>00168                 <span class="comment">// http://czyborra.com/charsets/iso8859.html</span>
<a name="l00169"></a><a class="code" href="classt3lib__cs.html#9bb5a04852c0887183eff86104014a1f">00169</a>         var <a class="code" href="classt3lib__cs.html#9bb5a04852c0887183eff86104014a1f">$synonyms</a>=array(
<a name="l00170"></a>00170                 'us' =&gt; 'ascii',
<a name="l00171"></a>00171                 'us-ascii'=&gt; 'ascii',
<a name="l00172"></a>00172                 'cp819' =&gt; 'iso-8859-1',
<a name="l00173"></a>00173                 'ibm819' =&gt; 'iso-8859-1',
<a name="l00174"></a>00174                 'iso-ir-100' =&gt; 'iso-8859-1',
<a name="l00175"></a>00175                 'iso-ir-109' =&gt; 'iso-8859-2',
<a name="l00176"></a>00176                 'iso-ir-148' =&gt; 'iso-8859-9',
<a name="l00177"></a>00177                 'iso-ir-199' =&gt; 'iso-8859-14',
<a name="l00178"></a>00178                 'iso-ir-203' =&gt; 'iso-8859-15',
<a name="l00179"></a>00179                 'csisolatin1' =&gt; 'iso-8859-1',
<a name="l00180"></a>00180                 'csisolatin2' =&gt; 'iso-8859-2',
<a name="l00181"></a>00181                 'csisolatin3' =&gt; 'iso-8859-3',
<a name="l00182"></a>00182                 'csisolatin5' =&gt; 'iso-8859-9',
<a name="l00183"></a>00183                 'csisolatin8' =&gt; 'iso-8859-14',
<a name="l00184"></a>00184                 'csisolatin9' =&gt; 'iso-8859-15',
<a name="l00185"></a>00185                 'csisolatingreek' =&gt; 'iso-8859-7',
<a name="l00186"></a>00186                 'iso-celtic' =&gt; 'iso-8859-14',
<a name="l00187"></a>00187                 'latin1' =&gt; 'iso-8859-1',
<a name="l00188"></a>00188                 'latin2' =&gt; 'iso-8859-2',
<a name="l00189"></a>00189                 'latin3' =&gt; 'iso-8859-3',
<a name="l00190"></a>00190                 'latin5' =&gt; 'iso-8859-9',
<a name="l00191"></a>00191                 'latin6' =&gt; 'iso-8859-10',
<a name="l00192"></a>00192                 'latin8' =&gt; 'iso-8859-14',
<a name="l00193"></a>00193                 'latin9' =&gt; 'iso-8859-15',
<a name="l00194"></a>00194                 'l1' =&gt; 'iso-8859-1',
<a name="l00195"></a>00195                 'l2' =&gt; 'iso-8859-2',
<a name="l00196"></a>00196                 'l3' =&gt; 'iso-8859-3',
<a name="l00197"></a>00197                 'l5' =&gt; 'iso-8859-9',
<a name="l00198"></a>00198                 'l6' =&gt; 'iso-8859-10',
<a name="l00199"></a>00199                 'l8' =&gt; 'iso-8859-14',
<a name="l00200"></a>00200                 'l9' =&gt; 'iso-8859-15',
<a name="l00201"></a>00201                 'cyrillic' =&gt; 'iso-8859-5',
<a name="l00202"></a>00202                 'arabic' =&gt; 'iso-8859-6',
<a name="l00203"></a>00203                 'tis-620' =&gt; 'iso-8859-11',
<a name="l00204"></a>00204                 'win874' =&gt; 'windows-874',
<a name="l00205"></a>00205                 'win1250' =&gt; 'windows-1250',
<a name="l00206"></a>00206                 'win1251' =&gt; 'windows-1251',
<a name="l00207"></a>00207                 'win1252' =&gt; 'windows-1252',
<a name="l00208"></a>00208                 'win1253' =&gt; 'windows-1253',
<a name="l00209"></a>00209                 'win1254' =&gt; 'windows-1254',
<a name="l00210"></a>00210                 'win1255' =&gt; 'windows-1255',
<a name="l00211"></a>00211                 'win1256' =&gt; 'windows-1256',
<a name="l00212"></a>00212                 'win1257' =&gt; 'windows-1257',
<a name="l00213"></a>00213                 'win1258' =&gt; 'windows-1258',
<a name="l00214"></a>00214                 'cp1250' =&gt; 'windows-1250',
<a name="l00215"></a>00215                 'cp1251' =&gt; 'windows-1251',
<a name="l00216"></a>00216                 'cp1252' =&gt; 'windows-1252',
<a name="l00217"></a>00217                 'ms-ee' =&gt; 'windows-1250',
<a name="l00218"></a>00218                 'ms-ansi' =&gt; 'windows-1252',
<a name="l00219"></a>00219                 'ms-greek' =&gt; 'windows-1253',
<a name="l00220"></a>00220                 'ms-turk' =&gt; 'windows-1254',
<a name="l00221"></a>00221                 'winbaltrim' =&gt; 'windows-1257',
<a name="l00222"></a>00222                 'koi-8ru' =&gt; 'koi-8r',
<a name="l00223"></a>00223                 'koi8r' =&gt; 'koi-8r',
<a name="l00224"></a>00224                 'cp878' =&gt; 'koi-8r',
<a name="l00225"></a>00225                 'mac' =&gt; 'macroman',
<a name="l00226"></a>00226                 'macintosh' =&gt; 'macroman',
<a name="l00227"></a>00227                 'euc-cn' =&gt; 'gb2312',
<a name="l00228"></a>00228                 'x-euc-cn' =&gt; 'gb2312',
<a name="l00229"></a>00229                 'euccn' =&gt; 'gb2312',
<a name="l00230"></a>00230                 'cp936' =&gt; 'gb2312',
<a name="l00231"></a>00231                 'big-5' =&gt; 'big5',
<a name="l00232"></a>00232                 'cp950' =&gt; 'big5',
<a name="l00233"></a>00233                 'eucjp' =&gt; 'euc-jp',
<a name="l00234"></a>00234                 'sjis' =&gt; 'shift_jis',
<a name="l00235"></a>00235                 'shift-jis' =&gt; 'shift_jis',
<a name="l00236"></a>00236                 'cp932' =&gt; 'shift_jis',
<a name="l00237"></a>00237                 'cp949' =&gt; 'euc-kr',
<a name="l00238"></a>00238                 'utf7' =&gt; 'utf-7',
<a name="l00239"></a>00239                 'utf8' =&gt; 'utf-8',
<a name="l00240"></a>00240                 'utf16' =&gt; 'utf-16',
<a name="l00241"></a>00241                 'utf32' =&gt; 'utf-32',
<a name="l00242"></a>00242                 'utf8' =&gt; 'utf-8',
<a name="l00243"></a>00243                 'ucs2' =&gt; 'ucs-2',
<a name="l00244"></a>00244                 'ucs4' =&gt; 'ucs-4',
<a name="l00245"></a>00245         );
<a name="l00246"></a>00246 
<a name="l00247"></a>00247                 <span class="comment">// mapping of iso-639:2 language codes to script names</span>
<a name="l00248"></a><a class="code" href="classt3lib__cs.html#21fc1d00a7274bdd25e7bbcbf2817c3b">00248</a>         var <a class="code" href="classt3lib__cs.html#21fc1d00a7274bdd25e7bbcbf2817c3b">$lang_to_script</a>=array(
<a name="l00249"></a>00249                         <span class="comment">// iso-639:2 language codes, see:</span>
<a name="l00250"></a>00250                         <span class="comment">//  http://www.w3.org/WAI/ER/IG/ert/iso639.htm</span>
<a name="l00251"></a>00251                         <span class="comment">//  http://www.loc.gov/standards/iso639-2/langcodes.html</span>
<a name="l00252"></a>00252                         <span class="comment">//  http://www.unicode.org/onlinedat/languages.html</span>
<a name="l00253"></a>00253                 'ar' =&gt; 'arabic',
<a name="l00254"></a>00254                 'bg' =&gt; 'cyrillic',             <span class="comment">// Bulgarian</span>
<a name="l00255"></a>00255                 'bs' =&gt; 'east_european',        <span class="comment">// Bosnian</span>
<a name="l00256"></a>00256                 'cs' =&gt; 'east_european',        <span class="comment">// Czech</span>
<a name="l00257"></a>00257                 'da' =&gt; 'west_european',        <span class="comment">// Danish</span>
<a name="l00258"></a>00258                 'de' =&gt; 'west_european',        <span class="comment">// German</span>
<a name="l00259"></a>00259                 'es' =&gt; 'west_european',        <span class="comment">// Spanish</span>
<a name="l00260"></a>00260                 'et' =&gt; 'estonian',
<a name="l00261"></a>00261                 'eo' =&gt; 'unicode',              <span class="comment">// Esperanto</span>
<a name="l00262"></a>00262                 'eu' =&gt; 'west_european',        <span class="comment">// Basque</span>
<a name="l00263"></a>00263                 'fa' =&gt; 'arabic',       <span class="comment">// Persian</span>
<a name="l00264"></a>00264                 'fi' =&gt; 'west_european',        <span class="comment">// Finish</span>
<a name="l00265"></a>00265                 'fo' =&gt; 'west_european',        <span class="comment">// Faroese</span>
<a name="l00266"></a>00266                 'fr' =&gt; 'west_european',        <span class="comment">// French</span>
<a name="l00267"></a>00267                 'gr' =&gt; 'greek',
<a name="l00268"></a>00268                 'he' =&gt; 'hebrew',               <span class="comment">// Hebrew (since 1998)</span>
<a name="l00269"></a>00269                 'hi' =&gt; 'unicode',              <span class="comment">// Hindi</span>
<a name="l00270"></a>00270                 'hr' =&gt; 'east_european',        <span class="comment">// Croatian</span>
<a name="l00271"></a>00271                 'hu' =&gt; 'east_european',        <span class="comment">// Hungarian</span>
<a name="l00272"></a>00272                 'iw' =&gt; 'hebrew',               <span class="comment">// Hebrew (til 1998)</span>
<a name="l00273"></a>00273                 'is' =&gt; 'west_european',        <span class="comment">// Icelandic</span>
<a name="l00274"></a>00274                 'it' =&gt; 'west_european',        <span class="comment">// Italian</span>
<a name="l00275"></a>00275                 'ja' =&gt; 'japanese',
<a name="l00276"></a>00276                 'kl' =&gt; 'west_european',        <span class="comment">// Greenlandic</span>
<a name="l00277"></a>00277                 'ko' =&gt; 'korean',
<a name="l00278"></a>00278                 'lt' =&gt; 'lithuanian',
<a name="l00279"></a>00279                 'lv' =&gt; 'west_european',        <span class="comment">// Latvian/Lettish</span>
<a name="l00280"></a>00280                 'nl' =&gt; 'west_european',        <span class="comment">// Dutch</span>
<a name="l00281"></a>00281                 'no' =&gt; 'west_european',        <span class="comment">// Norwegian</span>
<a name="l00282"></a>00282                 'pl' =&gt; 'east_european',        <span class="comment">// Polish</span>
<a name="l00283"></a>00283                 'pt' =&gt; 'west_european',        <span class="comment">// Portuguese</span>
<a name="l00284"></a>00284                 'ro' =&gt; 'east_european',        <span class="comment">// Romanian</span>
<a name="l00285"></a>00285                 'ru' =&gt; 'cyrillic',             <span class="comment">// Russian</span>
<a name="l00286"></a>00286                 'sk' =&gt; 'east_european',        <span class="comment">// Slovak</span>
<a name="l00287"></a>00287                 'sl' =&gt; 'east_european',        <span class="comment">// Slovenian</span>
<a name="l00288"></a>00288                 'sr' =&gt; 'cyrillic',             <span class="comment">// Serbian</span>
<a name="l00289"></a>00289                 'sv' =&gt; 'west_european',        <span class="comment">// Swedish</span>
<a name="l00290"></a>00290                 'th' =&gt; 'thai',
<a name="l00291"></a>00291                 'uk' =&gt; 'cyrillic',             <span class="comment">// Ukranian</span>
<a name="l00292"></a>00292                 'vi' =&gt; 'vietnamese',
<a name="l00293"></a>00293                 'zh' =&gt; 'chinese',
<a name="l00294"></a>00294                         <span class="comment">// MS language codes, see http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclib/html/_crt_language_strings.asp</span>
<a name="l00295"></a>00295                         <span class="comment">// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/wceinternational5/html/wce50conLanguageIdentifiersandLocales.asp</span>
<a name="l00296"></a>00296                 'ara' =&gt; 'arabic',
<a name="l00297"></a>00297                 'bgr' =&gt; 'cyrillic',            <span class="comment">// Bulgarian</span>
<a name="l00298"></a>00298                 'cat' =&gt; 'west_european',       <span class="comment">// Catalan</span>
<a name="l00299"></a>00299                 'chs' =&gt; 'simpl_chinese',
<a name="l00300"></a>00300                 'cht' =&gt; 'trad_chinese',
<a name="l00301"></a>00301                 'csy' =&gt; 'east_european',       <span class="comment">// Czech</span>
<a name="l00302"></a>00302                 'dan' =&gt; 'west_european',       <span class="comment">// Danisch</span>
<a name="l00303"></a>00303                 'deu' =&gt; 'west_european',       <span class="comment">// German</span>
<a name="l00304"></a>00304                 'dea' =&gt; 'west_european',       <span class="comment">// German (Austrian)</span>
<a name="l00305"></a>00305                 'des' =&gt; 'west_european',       <span class="comment">// German (Swiss)</span>
<a name="l00306"></a>00306                 'ena' =&gt; 'west_european',       <span class="comment">// English (Australian)</span>
<a name="l00307"></a>00307                 'enc' =&gt; 'west_european',       <span class="comment">// English (Canadian)</span>
<a name="l00308"></a>00308                 'eng' =&gt; 'west_european',       <span class="comment">// English</span>
<a name="l00309"></a>00309                 'enz' =&gt; 'west_european',       <span class="comment">// English (New Zealand)</span>
<a name="l00310"></a>00310                 'enu' =&gt; 'west_european',       <span class="comment">// English (United States)</span>
<a name="l00311"></a>00311                 'euq' =&gt; 'west_european',       <span class="comment">// Basque</span>
<a name="l00312"></a>00312                 'fos' =&gt; 'west_european',       <span class="comment">// Faroese</span>
<a name="l00313"></a>00313                 'far' =&gt; 'arabic',      <span class="comment">// Persian</span>
<a name="l00314"></a>00314                 'fin' =&gt; 'west_european',       <span class="comment">// Finish</span>
<a name="l00315"></a>00315                 'fra' =&gt; 'west_european',       <span class="comment">// French</span>
<a name="l00316"></a>00316                 'frb' =&gt; 'west_european',       <span class="comment">// French (Belgian)</span>
<a name="l00317"></a>00317                 'frc' =&gt; 'west_european',       <span class="comment">// French (Canadian)</span>
<a name="l00318"></a>00318                 'frs' =&gt; 'west_european',       <span class="comment">// French (Swiss)</span>
<a name="l00319"></a>00319                 'ell' =&gt; 'greek',
<a name="l00320"></a>00320                 'heb' =&gt; 'hebrew',
<a name="l00321"></a>00321                 'hin' =&gt; 'unicode',     <span class="comment">// Hindi</span>
<a name="l00322"></a>00322                 'hun' =&gt; 'east_european',       <span class="comment">// Hungarian</span>
<a name="l00323"></a>00323                 'isl' =&gt; 'west_euorpean',       <span class="comment">// Icelandic</span>
<a name="l00324"></a>00324                 'ita' =&gt; 'west_european',       <span class="comment">// Italian</span>
<a name="l00325"></a>00325                 'its' =&gt; 'west_european',       <span class="comment">// Italian (Swiss)</span>
<a name="l00326"></a>00326                 'jpn' =&gt; 'japanese',
<a name="l00327"></a>00327                 'kor' =&gt; 'korean',
<a name="l00328"></a>00328                 'lth' =&gt; 'lithuanian',
<a name="l00329"></a>00329                 'lvi' =&gt; 'west_european',       <span class="comment">// Latvian/Lettish</span>
<a name="l00330"></a>00330                 'msl' =&gt; 'west_european',       <span class="comment">// Malay</span>
<a name="l00331"></a>00331                 'nlb' =&gt; 'west_european',       <span class="comment">// Dutch (Belgian)</span>
<a name="l00332"></a>00332                 'nld' =&gt; 'west_european',       <span class="comment">// Dutch</span>
<a name="l00333"></a>00333                 'nor' =&gt; 'west_european',       <span class="comment">// Norwegian (bokmal)</span>
<a name="l00334"></a>00334                 'non' =&gt; 'west_european',       <span class="comment">// Norwegian (nynorsk)</span>
<a name="l00335"></a>00335                 'plk' =&gt; 'east_european',       <span class="comment">// Polish</span>
<a name="l00336"></a>00336                 'ptg' =&gt; 'west_european',       <span class="comment">// Portuguese</span>
<a name="l00337"></a>00337                 'ptb' =&gt; 'west_european',       <span class="comment">// Portuguese (Brazil)</span>
<a name="l00338"></a>00338                 'rom' =&gt; 'east_european',       <span class="comment">// Romanian</span>
<a name="l00339"></a>00339                 'rus' =&gt; 'cyrillic',            <span class="comment">// Russian</span>
<a name="l00340"></a>00340                 'slv' =&gt; 'east_european',       <span class="comment">// Slovenian</span>
<a name="l00341"></a>00341                 'sky' =&gt; 'east_european',       <span class="comment">// Slovak</span>
<a name="l00342"></a>00342                 'srl' =&gt; 'east_european',       <span class="comment">// Serbian (Latin)</span>
<a name="l00343"></a>00343                 'srb' =&gt; 'cyrillic',            <span class="comment">// Serbian (Cyrillic)</span>
<a name="l00344"></a>00344                 'esp' =&gt; 'west_european',       <span class="comment">// Spanish (trad. sort)</span>
<a name="l00345"></a>00345                 'esm' =&gt; 'west_european',       <span class="comment">// Spanish (Mexican)</span>
<a name="l00346"></a>00346                 'esn' =&gt; 'west_european',       <span class="comment">// Spanish (internat. sort)</span>
<a name="l00347"></a>00347                 'sve' =&gt; 'west_european',       <span class="comment">// Swedish</span>
<a name="l00348"></a>00348                 'tha' =&gt; 'thai',
<a name="l00349"></a>00349                 'trk' =&gt; 'turkish',
<a name="l00350"></a>00350                 'ukr' =&gt; 'cyrillic',    <span class="comment">// Ukrainian</span>
<a name="l00351"></a>00351                         <span class="comment">// English language names</span>
<a name="l00352"></a>00352                 'arabic' =&gt; 'arabic',
<a name="l00353"></a>00353                 'basque' =&gt; 'west_european',
<a name="l00354"></a>00354                 'bosnian' =&gt; 'east_european',
<a name="l00355"></a>00355                 'bulgarian' =&gt; 'east_european',
<a name="l00356"></a>00356                 'catalan' =&gt; 'west_european',
<a name="l00357"></a>00357                 'croatian' =&gt; 'east_european',
<a name="l00358"></a>00358                 'czech' =&gt; 'east_european',
<a name="l00359"></a>00359                 'danish' =&gt; 'west_european',
<a name="l00360"></a>00360                 'dutch' =&gt; 'west_european',
<a name="l00361"></a>00361                 'english' =&gt; 'west_european',
<a name="l00362"></a>00362                 'esperanto' =&gt; 'unicode',
<a name="l00363"></a>00363                 'estonian' =&gt; 'estonian',
<a name="l00364"></a>00364                 'faroese' =&gt; 'west_european',
<a name="l00365"></a>00365                 'farsi' =&gt; 'arabic',
<a name="l00366"></a>00366                 'finnish' =&gt; 'west_european',
<a name="l00367"></a>00367                 'french' =&gt; 'west_european',
<a name="l00368"></a>00368                 'galician' =&gt; 'west_european',
<a name="l00369"></a>00369                 'german' =&gt; 'west_european',
<a name="l00370"></a>00370                 'greek' =&gt; 'greek',
<a name="l00371"></a>00371                 'greenlandic' =&gt; 'west_european',
<a name="l00372"></a>00372                 'hebrew' =&gt; 'hebrew',
<a name="l00373"></a>00373                 'hindi' =&gt; 'unicode',
<a name="l00374"></a>00374                 'hungarian' =&gt; 'east_european',
<a name="l00375"></a>00375                 'icelandic' =&gt; 'west_european',
<a name="l00376"></a>00376                 'italian' =&gt; 'west_european',
<a name="l00377"></a>00377                 'latvian' =&gt; 'west_european',
<a name="l00378"></a>00378                 'lettish' =&gt; 'west_european',
<a name="l00379"></a>00379                 'lithuanian' =&gt; 'lithuanian',
<a name="l00380"></a>00380                 'malay' =&gt; 'west_european',
<a name="l00381"></a>00381                 'norwegian' =&gt; 'west_european',
<a name="l00382"></a>00382                 'persian' =&gt; 'arabic',
<a name="l00383"></a>00383                 'polish' =&gt; 'east_european',
<a name="l00384"></a>00384                 'portuguese' =&gt; 'west_european',
<a name="l00385"></a>00385                 'russian' =&gt; 'cyrillic',
<a name="l00386"></a>00386                 'romanian' =&gt; 'east_european',
<a name="l00387"></a>00387                 'serbian' =&gt; 'cyrillic',
<a name="l00388"></a>00388                 'slovak' =&gt; 'east_european',
<a name="l00389"></a>00389                 'slovenian' =&gt; 'east_european',
<a name="l00390"></a>00390                 'spanish' =&gt; 'west_european',
<a name="l00391"></a>00391                 'svedish' =&gt; 'west_european',
<a name="l00392"></a>00392                 'that' =&gt; 'thai',
<a name="l00393"></a>00393                 'turkish' =&gt; 'turkish',
<a name="l00394"></a>00394                 'ukrainian' =&gt; 'cyrillic',
<a name="l00395"></a>00395         );
<a name="l00396"></a>00396 
<a name="l00397"></a>00397                 <span class="comment">// mapping of language (family) names to charsets on Unix</span>
<a name="l00398"></a><a class="code" href="classt3lib__cs.html#c124a372529679f320a01ac2e9643f90">00398</a>         var <a class="code" href="classt3lib__cs.html#c124a372529679f320a01ac2e9643f90">$script_to_charset_unix</a>=array(
<a name="l00399"></a>00399                 'west_european' =&gt; 'iso-8859-1',
<a name="l00400"></a>00400                 'estonian' =&gt; 'iso-8859-1',
<a name="l00401"></a>00401                 'east_european' =&gt; 'iso-8859-2',
<a name="l00402"></a>00402                 'baltic' =&gt; 'iso-8859-4',
<a name="l00403"></a>00403                 'cyrillic' =&gt; 'iso-8859-5',
<a name="l00404"></a>00404                 'arabic' =&gt; 'iso-8859-6',
<a name="l00405"></a>00405                 'greek' =&gt; 'iso-8859-7',
<a name="l00406"></a>00406                 'hebrew' =&gt; 'iso-8859-8',
<a name="l00407"></a>00407                 'turkish' =&gt; 'iso-8859-9',
<a name="l00408"></a>00408                 'thai' =&gt; 'iso-8859-11', <span class="comment">// = TIS-620</span>
<a name="l00409"></a>00409                 'lithuanian' =&gt; 'iso-8859-13',
<a name="l00410"></a>00410                 'chinese' =&gt; 'gb2312', <span class="comment">// = euc-cn</span>
<a name="l00411"></a>00411                 'japanese' =&gt; 'euc-jp',
<a name="l00412"></a>00412                 'korean' =&gt; 'euc-kr',
<a name="l00413"></a>00413                 'simpl_chinese' =&gt; 'gb2312',
<a name="l00414"></a>00414                 'trad_chinese' =&gt; 'big5',
<a name="l00415"></a>00415                 'vietnamese' =&gt; '',
<a name="l00416"></a>00416                 'unicode' =&gt; 'utf-8',
<a name="l00417"></a>00417         );
<a name="l00418"></a>00418 
<a name="l00419"></a>00419                 <span class="comment">// mapping of language (family) names to charsets on Windows</span>
<a name="l00420"></a><a class="code" href="classt3lib__cs.html#a08d813e271dd1e3546cc2632d229aff">00420</a>         var <a class="code" href="classt3lib__cs.html#a08d813e271dd1e3546cc2632d229aff">$script_to_charset_windows</a>=array(
<a name="l00421"></a>00421                 'east_european' =&gt; 'windows-1250',
<a name="l00422"></a>00422                 'cyrillic' =&gt; 'windows-1251',
<a name="l00423"></a>00423                 'west_european' =&gt; 'windows-1252',
<a name="l00424"></a>00424                 'greek' =&gt; 'windows-1253',
<a name="l00425"></a>00425                 'turkish' =&gt; 'windows-1254',
<a name="l00426"></a>00426                 'hebrew' =&gt; 'windows-1255',
<a name="l00427"></a>00427                 'arabic' =&gt; 'windows-1256',
<a name="l00428"></a>00428                 'baltic' =&gt; 'windows-1257',
<a name="l00429"></a>00429                 'estonian' =&gt; 'windows-1257',
<a name="l00430"></a>00430                 'lithuanian' =&gt; 'windows-1257',
<a name="l00431"></a>00431                 'vietnamese' =&gt; 'windows-1258',
<a name="l00432"></a>00432                 'thai' =&gt; 'cp874',
<a name="l00433"></a>00433                 'korean' =&gt; 'cp949',
<a name="l00434"></a>00434                 'chinese' =&gt; 'gb2312',
<a name="l00435"></a>00435                 'japanese' =&gt; 'shift_jis',
<a name="l00436"></a>00436                 'simpl_chinese' =&gt; 'gb2312',
<a name="l00437"></a>00437                 'trad_chinese' =&gt; 'big5',
<a name="l00438"></a>00438         );
<a name="l00439"></a>00439 
<a name="l00440"></a>00440                 <span class="comment">// mapping of locale names to charsets</span>
<a name="l00441"></a><a class="code" href="classt3lib__cs.html#144b906c09da3dd2e969405778b4e6c4">00441</a>         var <a class="code" href="classt3lib__cs.html#144b906c09da3dd2e969405778b4e6c4">$locale_to_charset</a>=array(
<a name="l00442"></a>00442                 'japanese.euc' =&gt; 'euc-jp',
<a name="l00443"></a>00443                 'ja_jp.ujis' =&gt; 'euc-jp',
<a name="l00444"></a>00444                 'korean.euc' =&gt; 'euc-kr',
<a name="l00445"></a>00445                 'sr@Latn' =&gt; 'iso-8859-2',
<a name="l00446"></a>00446                 'zh_cn' =&gt; 'gb2312',
<a name="l00447"></a>00447                 'zh_hk' =&gt; 'big5',
<a name="l00448"></a>00448                 'zh_tw' =&gt; 'big5',
<a name="l00449"></a>00449         );
<a name="l00450"></a>00450 
<a name="l00451"></a>00451                 <span class="comment">// TYPO3 specific: Array with the system charsets used for each system language in TYPO3:</span>
<a name="l00452"></a>00452                 <span class="comment">// Empty values means "iso-8859-1"</span>
<a name="l00453"></a><a class="code" href="classt3lib__cs.html#26a016f1c5ea7588cc345351d653e165">00453</a>         var <a class="code" href="classt3lib__cs.html#26a016f1c5ea7588cc345351d653e165">$charSetArray</a> = array(
<a name="l00454"></a>00454                 'dk' =&gt; '',
<a name="l00455"></a>00455                 'de' =&gt; '',
<a name="l00456"></a>00456                 'no' =&gt; '',
<a name="l00457"></a>00457                 'it' =&gt; '',
<a name="l00458"></a>00458                 'fr' =&gt; '',
<a name="l00459"></a>00459                 'es' =&gt; '',
<a name="l00460"></a>00460                 'nl' =&gt; '',
<a name="l00461"></a>00461                 'cz' =&gt; 'windows-1250',
<a name="l00462"></a>00462                 'pl' =&gt; 'iso-8859-2',
<a name="l00463"></a>00463                 'si' =&gt; 'windows-1250',
<a name="l00464"></a>00464                 'fi' =&gt; '',
<a name="l00465"></a>00465                 'tr' =&gt; 'iso-8859-9',
<a name="l00466"></a>00466                 'se' =&gt; '',
<a name="l00467"></a>00467                 'pt' =&gt; '',
<a name="l00468"></a>00468                 'ru' =&gt; 'windows-1251',
<a name="l00469"></a>00469                 'ro' =&gt; 'iso-8859-2',
<a name="l00470"></a>00470                 'ch' =&gt; 'gb2312',
<a name="l00471"></a>00471                 'sk' =&gt; 'windows-1250',
<a name="l00472"></a>00472                 'lt' =&gt; 'windows-1257',
<a name="l00473"></a>00473                 'is' =&gt; 'utf-8',
<a name="l00474"></a>00474                 'hr' =&gt; 'windows-1250',
<a name="l00475"></a>00475                 'hu' =&gt; 'iso-8859-2',
<a name="l00476"></a>00476                 'gl' =&gt; '',
<a name="l00477"></a>00477                 'th' =&gt; 'iso-8859-11',
<a name="l00478"></a>00478                 'gr' =&gt; 'iso-8859-7',
<a name="l00479"></a>00479                 'hk' =&gt; 'big5',
<a name="l00480"></a>00480                 'eu' =&gt; '',
<a name="l00481"></a>00481                 'bg' =&gt; 'windows-1251',
<a name="l00482"></a>00482                 'br' =&gt; '',
<a name="l00483"></a>00483                 'et' =&gt; 'iso-8859-4',
<a name="l00484"></a>00484                 'ar' =&gt; 'iso-8859-6',
<a name="l00485"></a>00485                 'he' =&gt; 'utf-8',
<a name="l00486"></a>00486                 'ua' =&gt; 'windows-1251',
<a name="l00487"></a>00487                 'jp' =&gt; 'shift_jis',
<a name="l00488"></a>00488                 'lv' =&gt; 'utf-8',
<a name="l00489"></a>00489                 'vn' =&gt; 'utf-8',
<a name="l00490"></a>00490                 'ca' =&gt; 'iso-8859-15',
<a name="l00491"></a>00491                 'ba' =&gt; 'iso-8859-2',
<a name="l00492"></a>00492                 'kr' =&gt; 'euc-kr',
<a name="l00493"></a>00493                 'eo' =&gt; 'utf-8',
<a name="l00494"></a>00494                 'my' =&gt; '',
<a name="l00495"></a>00495                 'hi' =&gt; 'utf-8',
<a name="l00496"></a>00496                 'fo' =&gt; 'utf-8',
<a name="l00497"></a>00497                 'fa' =&gt; 'utf-8',
<a name="l00498"></a>00498                 'sr' =&gt; 'utf-8'
<a name="l00499"></a>00499         );
<a name="l00500"></a>00500 
<a name="l00501"></a>00501                 <span class="comment">// TYPO3 specific: Array with the iso names used for each system language in TYPO3:</span>
<a name="l00502"></a>00502                 <span class="comment">// Missing keys means: same as Typo3</span>
<a name="l00503"></a><a class="code" href="classt3lib__cs.html#60f145ab5597088de7edbf7ce1ae936c">00503</a>         var <a class="code" href="classt3lib__cs.html#60f145ab5597088de7edbf7ce1ae936c">$isoArray</a> = array(
<a name="l00504"></a>00504                 'ba' =&gt; 'bs',
<a name="l00505"></a>00505                 'br' =&gt; 'pt_BR',
<a name="l00506"></a>00506                 'ch' =&gt; 'zh_CN',
<a name="l00507"></a>00507                 'cz' =&gt; 'cs',
<a name="l00508"></a>00508                 'dk' =&gt; 'da',
<a name="l00509"></a>00509                 'si' =&gt; 'sl',
<a name="l00510"></a>00510                 'se' =&gt; 'sv',
<a name="l00511"></a>00511                 'gl' =&gt; 'kl',
<a name="l00512"></a>00512                 'gr' =&gt; 'el',
<a name="l00513"></a>00513                 'hk' =&gt; 'zh_HK',
<a name="l00514"></a>00514                 'kr' =&gt; 'ko',
<a name="l00515"></a>00515                 'ua' =&gt; 'uk',
<a name="l00516"></a>00516                 'jp' =&gt; 'ja',
<a name="l00517"></a>00517                 'vn' =&gt; 'vi',
<a name="l00518"></a>00518         );
<a name="l00519"></a>00519 
<a name="l00527"></a><a class="code" href="classt3lib__cs.html#70173851d5afc216610e76d69d78935d">00527</a>         function <a class="code" href="classt3lib__cs.html#70173851d5afc216610e76d69d78935d">parse_charset</a>($charset)        {
<a name="l00528"></a>00528                 $charset = trim(strtolower($charset));
<a name="l00529"></a>00529                 <span class="keywordflow">if</span> (isset($this-&gt;synonyms[$charset]))   $charset = $this-&gt;synonyms[$charset];
<a name="l00530"></a>00530 
<a name="l00531"></a>00531                 <span class="keywordflow">return</span> $charset;
<a name="l00532"></a>00532         }
<a name="l00533"></a>00533 
<a name="l00546"></a><a class="code" href="classt3lib__cs.html#50c8e620cc1aee632e29fe8b73ac7613">00546</a>         function <a class="code" href="classt3lib__cs.html#50c8e620cc1aee632e29fe8b73ac7613">get_locale_charset</a>($locale)    {
<a name="l00547"></a>00547                 $locale = strtolower($locale);
<a name="l00548"></a>00548 
<a name="l00549"></a>00549                         <span class="comment">// exact locale specific charset?</span>
<a name="l00550"></a>00550                 <span class="keywordflow">if</span> (isset($this-&gt;locale_to_charset[$locale]))   <span class="keywordflow">return</span> $this-&gt;locale_to_charset[$locale];
<a name="l00551"></a>00551 
<a name="l00552"></a>00552                         <span class="comment">// get modifier</span>
<a name="l00553"></a>00553                 list($locale,$modifier) = explode(<span class="charliteral">'@'</span>,$locale);
<a name="l00554"></a>00554 
<a name="l00555"></a>00555                         <span class="comment">// locale contains charset: use it</span>
<a name="l00556"></a>00556                 list($locale,$charset) = explode(<span class="charliteral">'.'</span>,$locale);
<a name="l00557"></a>00557                 <span class="keywordflow">if</span> ($charset)   <span class="keywordflow">return</span> $this-&gt;<a class="code" href="classt3lib__cs.html#70173851d5afc216610e76d69d78935d">parse_charset</a>($charset);
<a name="l00558"></a>00558 
<a name="l00559"></a>00559                         <span class="comment">// modifier is 'euro' (after charset check, because of xx.utf-8@euro)</span>
<a name="l00560"></a>00560                 <span class="keywordflow">if</span> ($modifier == 'euro')        <span class="keywordflow">return</span> 'iso-8859-15';
<a name="l00561"></a>00561 
<a name="l00562"></a>00562                         <span class="comment">// get language</span>
<a name="l00563"></a>00563                 list($language,$country) = explode(<span class="charliteral">'_'</span>,$locale);
<a name="l00564"></a>00564                 <span class="keywordflow">if</span> (isset($this-&gt;lang_to_script[$language]))    $script = $this-&gt;lang_to_script[$language];
<a name="l00565"></a>00565 
<a name="l00566"></a>00566                 <span class="keywordflow">if</span> (TYPO3_OS == 'WIN')  {
<a name="l00567"></a>00567                         $cs = $this-&gt;script_to_charset_windows[$script] ? $this-&gt;script_to_charset_windows[$script] : 'window-1252';
<a name="l00568"></a>00568                 } <span class="keywordflow">else</span> {
<a name="l00569"></a>00569                         $cs = $this-&gt;script_to_charset_unix[$script] ? $this-&gt;script_to_charset_unix[$script] : 'iso-8859-1';
<a name="l00570"></a>00570                 }
<a name="l00571"></a>00571 
<a name="l00572"></a>00572                 <span class="keywordflow">return</span> $cs;
<a name="l00573"></a>00573         }
<a name="l00574"></a>00574 
<a name="l00575"></a>00575 
<a name="l00576"></a>00576 
<a name="l00577"></a>00577 
<a name="l00578"></a>00578 
<a name="l00579"></a>00579 
<a name="l00580"></a>00580 
<a name="l00581"></a>00581 
<a name="l00582"></a>00582 
<a name="l00583"></a>00583         <span class="comment">/********************************************</span>
<a name="l00584"></a>00584 <span class="comment">         *</span>
<a name="l00585"></a>00585 <span class="comment">         * Charset Conversion functions</span>
<a name="l00586"></a>00586 <span class="comment">         *</span>
<a name="l00587"></a>00587 <span class="comment">         ********************************************/</span>
<a name="l00588"></a>00588 
<a name="l00599"></a><a class="code" href="classt3lib__cs.html#c0c3949ae6738f5553fd813fa4b8d047">00599</a>         function <a class="code" href="classt3lib__cs.html#c0c3949ae6738f5553fd813fa4b8d047">conv</a>($str,$fromCS,$toCS,$useEntityForNoChar=0) {
<a name="l00600"></a>00600                 <span class="keywordflow">if</span> ($fromCS==$toCS)     <span class="keywordflow">return</span> $str;
<a name="l00601"></a>00601 
<a name="l00602"></a>00602                         <span class="comment">// PHP-libs don't support fallback to SGML entities, but UTF-8 handles everything</span>
<a name="l00603"></a>00603                 <span class="keywordflow">if</span> ($toCS=='utf-8' || !$useEntityForNoChar)     {
<a name="l00604"></a>00604                         <span class="keywordflow">switch</span>($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod'])       {
<a name="l00605"></a>00605                         <span class="keywordflow">case</span> 'mbstring':
<a name="l00606"></a>00606                                 $conv_str = mb_convert_encoding($str,$toCS,$fromCS);
<a name="l00607"></a>00607                                 <span class="keywordflow">if</span> (<span class="keyword">false</span> !== $conv_str)        <span class="keywordflow">return</span> $conv_str; <span class="comment">// returns false for unsupported charsets</span>
<a name="l00608"></a>00608                                 <span class="keywordflow">break</span>;
<a name="l00609"></a>00609 
<a name="l00610"></a>00610                         <span class="keywordflow">case</span> 'iconv':
<a name="l00611"></a>00611                                 $conv_str = iconv($fromCS,$toCS.'<span class="comment">//TRANSLIT',$str);</span>
<a name="l00612"></a>00612                                 <span class="keywordflow">if</span> (<span class="keyword">false</span> !== $conv_str)        <span class="keywordflow">return</span> $conv_str;
<a name="l00613"></a>00613                                 <span class="keywordflow">break</span>;
<a name="l00614"></a>00614 
<a name="l00615"></a>00615                         <span class="keywordflow">case</span> 'recode':
<a name="l00616"></a>00616                                 $conv_str = recode_string($fromCS.'..'.$toCS,$str);
<a name="l00617"></a>00617                                 <span class="keywordflow">if</span> (<span class="keyword">false</span> !== $conv_str)        <span class="keywordflow">return</span> $conv_str;
<a name="l00618"></a>00618                                 <span class="keywordflow">break</span>;
<a name="l00619"></a>00619                         }
<a name="l00620"></a>00620                         <span class="comment">// fallback to TYPO3 conversion</span>
<a name="l00621"></a>00621                 }
<a name="l00622"></a>00622 
<a name="l00623"></a>00623                 <span class="keywordflow">if</span> ($fromCS!='utf-8')   $str=$this-&gt;<a class="code" href="classt3lib__cs.html#11eaa1ad3c2b2e572282d9aab3438b0d">utf8_encode</a>($str,$fromCS);
<a name="l00624"></a>00624                 <span class="keywordflow">if</span> ($toCS!='utf-8')     $str=$this-&gt;<a class="code" href="classt3lib__cs.html#dcc576daa00767dc9b298ddd7cfac1ba">utf8_decode</a>($str,$toCS,$useEntityForNoChar);
<a name="l00625"></a>00625                 <span class="keywordflow">return</span> $str;
<a name="l00626"></a>00626         }
<a name="l00627"></a>00627 
<a name="l00639"></a><a class="code" href="classt3lib__cs.html#2106800313be12415652490b92204f79">00639</a>         function <a class="code" href="classt3lib__cs.html#2106800313be12415652490b92204f79">convArray</a>(&amp;$array,$fromCS,$toCS,$useEntityForNoChar=0) {
<a name="l00640"></a>00640                 foreach($array as $key =&gt; $value)       {
<a name="l00641"></a>00641                         <span class="keywordflow">if</span> (is_array($array[$key]))     {
<a name="l00642"></a>00642                                 $this-&gt;<a class="code" href="classt3lib__cs.html#2106800313be12415652490b92204f79">convArray</a>($array[$key],$fromCS,$toCS,$useEntityForNoChar);
<a name="l00643"></a>00643                         } <span class="keywordflow">else</span> {
<a name="l00644"></a>00644                                 $array[$key] = $this-&gt;<a class="code" href="classt3lib__cs.html#c0c3949ae6738f5553fd813fa4b8d047">conv</a>($array[$key],$fromCS,$toCS,$useEntityForNoChar);
<a name="l00645"></a>00645                         }
<a name="l00646"></a>00646                 }
<a name="l00647"></a>00647         }
<a name="l00648"></a>00648 
<a name="l00656"></a><a class="code" href="classt3lib__cs.html#11eaa1ad3c2b2e572282d9aab3438b0d">00656</a>         function <a class="code" href="classt3lib__cs.html#11eaa1ad3c2b2e572282d9aab3438b0d">utf8_encode</a>($str,$charset)     {
<a name="l00657"></a>00657 
<a name="l00658"></a>00658                 <span class="keywordflow">if</span> ($charset === 'utf-8')       <span class="keywordflow">return</span> $str;
<a name="l00659"></a>00659 
<a name="l00660"></a>00660                         <span class="comment">// Charset is case-insensitive.</span>
<a name="l00661"></a>00661                 <span class="keywordflow">if</span> ($this-&gt;<a class="code" href="classt3lib__cs.html#630dd257265f8f9f0955d21cafee1e56">initCharset</a>($charset))       {       <span class="comment">// Parse conv. table if not already...</span>
<a name="l00662"></a>00662                         $strLen = <a class="code" href="classt3lib__cs.html#ba3dbbe621b02266e154ea0dfa15247a">strlen</a>($str);
<a name="l00663"></a>00663                         $outStr='';
<a name="l00664"></a>00664 
<a name="l00665"></a>00665                         <span class="keywordflow">for</span> ($a=0;$a&lt;$strLen;$a++)      {       <span class="comment">// Traverse each char in string.</span>
<a name="l00666"></a>00666                                 $chr=<a class="code" href="classt3lib__cs.html#68868a1e06c8f028dde56268b09ff92d">substr</a>($str,$a,1);
<a name="l00667"></a>00667                                 $ord=ord($chr);
<a name="l00668"></a>00668                                 <span class="keywordflow">if</span> (isset($this-&gt;twoByteSets[$charset]))        {       <span class="comment">// If the charset has two bytes per char</span>
<a name="l00669"></a>00669                                         $ord2 = ord($str{$a+1});
<a name="l00670"></a>00670                                         $ord = $ord&lt;&lt;8 | $ord2; <span class="comment">// assume big endian</span>
<a name="l00671"></a>00671 
<a name="l00672"></a>00672                                         <span class="keywordflow">if</span> (isset($this-&gt;parsedCharsets[$charset]['local'][$ord]))      {       <span class="comment">// If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?)</span>
<a name="l00673"></a>00673                                                 $outStr.=$this-&gt;parsedCharsets[$charset]['local'][$ord];
<a name="l00674"></a>00674                                         } <span class="keywordflow">else</span> $outStr.=chr($this-&gt;noCharByteVal);      <span class="comment">// No char exists</span>
<a name="l00675"></a>00675                                         $a++;
<a name="l00676"></a>00676                                 } elseif ($ord&gt;127)     {       <span class="comment">// If char has value over 127 it's a multibyte char in UTF-8</span>
<a name="l00677"></a>00677                                         <span class="keywordflow">if</span> (isset($this-&gt;eucBasedSets[$charset]))       {       <span class="comment">// EUC uses two-bytes above 127; we get both and advance pointer and make $ord a 16bit int.</span>
<a name="l00678"></a>00678                                                 <span class="keywordflow">if</span> ($charset != 'shift_jis' || ($ord &lt; 0xA0 || $ord &gt; 0xDF))    {       <span class="comment">// Shift-JIS: chars between 160 and 223 are single byte</span>
<a name="l00679"></a>00679                                                         $a++;
<a name="l00680"></a>00680                                                         $ord2=ord(<a class="code" href="classt3lib__cs.html#68868a1e06c8f028dde56268b09ff92d">substr</a>($str,$a,1));
<a name="l00681"></a>00681                                                         $ord = $ord*256+$ord2;
<a name="l00682"></a>00682                                                 }
<a name="l00683"></a>00683                                         }
<a name="l00684"></a>00684 
<a name="l00685"></a>00685                                         <span class="keywordflow">if</span> (isset($this-&gt;parsedCharsets[$charset]['local'][$ord]))      {       <span class="comment">// If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?)</span>
<a name="l00686"></a>00686                                                 $outStr.= $this-&gt;parsedCharsets[$charset]['local'][$ord];
<a name="l00687"></a>00687                                         } <span class="keywordflow">else</span> $outStr.= chr($this-&gt;noCharByteVal);     <span class="comment">// No char exists</span>
<a name="l00688"></a>00688                                 } <span class="keywordflow">else</span> $outStr.= $chr;  <span class="comment">// ... otherwise it's just ASCII 0-127 and one byte. Transparent</span>
<a name="l00689"></a>00689                         }
<a name="l00690"></a>00690                         <span class="keywordflow">return</span> $outStr;
<a name="l00691"></a>00691                 }
<a name="l00692"></a>00692         }
<a name="l00693"></a>00693 
<a name="l00702"></a><a class="code" href="classt3lib__cs.html#dcc576daa00767dc9b298ddd7cfac1ba">00702</a>         function utf8_decode($str,$charset,$useEntityForNoChar=0)       {
<a name="l00703"></a>00703 
<a name="l00704"></a>00704                         <span class="comment">// Charset is case-insensitive.</span>
<a name="l00705"></a>00705                 <span class="keywordflow">if</span> ($this-&gt;initCharset($charset))       {       <span class="comment">// Parse conv. table if not already...</span>
<a name="l00706"></a>00706                         $strLen = strlen($str);
<a name="l00707"></a>00707                         $outStr='';
<a name="l00708"></a>00708                         $buf='';
<a name="l00709"></a>00709                         <span class="keywordflow">for</span> ($a=0,$i=0;$a&lt;$strLen;$a++,$i++)    {       <span class="comment">// Traverse each char in UTF-8 string.</span>
<a name="l00710"></a>00710                                 $chr=substr($str,$a,1);
<a name="l00711"></a>00711                                 $ord=ord($chr);
<a name="l00712"></a>00712                                 <span class="keywordflow">if</span> ($ord&gt;127)   {       <span class="comment">// This means multibyte! (first byte!)</span>
<a name="l00713"></a>00713                                         <span class="keywordflow">if</span> ($ord &amp; 64)  {       <span class="comment">// Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.</span>
<a name="l00714"></a>00714 
<a name="l00715"></a>00715                                                 $buf=$chr;      <span class="comment">// Add first byte</span>
<a name="l00716"></a>00716                                                 <span class="keywordflow">for</span> ($b=0;$b&lt;8;$b++)    {       <span class="comment">// for each byte in multibyte string...</span>
<a name="l00717"></a>00717                                                         $ord = $ord &lt;&lt; 1;       <span class="comment">// Shift it left and ...</span>
<a name="l00718"></a>00718                                                         <span class="keywordflow">if</span> ($ord &amp; 128) {       <span class="comment">// ... and with 8th bit - if that is set, then there are still bytes in sequence.</span>
<a name="l00719"></a>00719                                                                 $a++;   <span class="comment">// Increase pointer...</span>
<a name="l00720"></a>00720                                                                 $buf.=substr($str,$a,1);        <span class="comment">// ... and add the next char.</span>
<a name="l00721"></a>00721                                                         } <span class="keywordflow">else</span> <span class="keywordflow">break</span>;
<a name="l00722"></a>00722                                                 }
<a name="l00723"></a>00723 
<a name="l00724"></a>00724                                                 <span class="keywordflow">if</span> (isset($this-&gt;parsedCharsets[$charset]['utf8'][$buf]))       {       <span class="comment">// If the UTF-8 char-sequence is found then...</span>
<a name="l00725"></a>00725                                                         $mByte = $this-&gt;parsedCharsets[$charset]['utf8'][$buf]; <span class="comment">// The local number</span>
<a name="l00726"></a>00726                                                         <span class="keywordflow">if</span> ($mByte&gt;255) {       <span class="comment">// If the local number is greater than 255 we will need to split the byte (16bit word assumed) in two chars.</span>
<a name="l00727"></a>00727                                                                 $outStr.= chr(($mByte &gt;&gt; 8) &amp; 255).chr($mByte &amp; 255);
<a name="l00728"></a>00728                                                         } <span class="keywordflow">else</span> $outStr.= chr($mByte);
<a name="l00729"></a>00729                                                 } elseif ($useEntityForNoChar) {        <span class="comment">// Create num entity:</span>
<a name="l00730"></a>00730                                                         $outStr.='&amp;#'.$this-&gt;utf8CharToUnumber($buf,1).<span class="charliteral">';'</span>;
<a name="l00731"></a>00731                                                 } <span class="keywordflow">else</span> $outStr.=chr($this-&gt;noCharByteVal);      <span class="comment">// No char exists</span>
<a name="l00732"></a>00732                                         } <span class="keywordflow">else</span> $outStr.=chr($this-&gt;noCharByteVal);      <span class="comment">// No char exists (MIDDLE of MB sequence!)</span>
<a name="l00733"></a>00733                                 } <span class="keywordflow">else</span> $outStr.=$chr;   <span class="comment">// ... otherwise it's just ASCII 0-127 and one byte. Transparent</span>
<a name="l00734"></a>00734                         }
<a name="l00735"></a>00735                         <span class="keywordflow">return</span> $outStr;
<a name="l00736"></a>00736                 }
<a name="l00737"></a>00737         }
<a name="l00738"></a>00738 
<a name="l00745"></a><a class="code" href="classt3lib__cs.html#4e8ebb826f8a7b5081e86556c9166a27">00745</a>         function utf8_to_entities($str) {
<a name="l00746"></a>00746                 $strLen = strlen($str);
<a name="l00747"></a>00747                 $outStr='';
<a name="l00748"></a>00748                 $buf='';
<a name="l00749"></a>00749                 <span class="keywordflow">for</span> ($a=0;$a&lt;$strLen;$a++)      {       <span class="comment">// Traverse each char in UTF-8 string.</span>
<a name="l00750"></a>00750                         $chr=substr($str,$a,1);
<a name="l00751"></a>00751                         $ord=ord($chr);
<a name="l00752"></a>00752                         <span class="keywordflow">if</span> ($ord&gt;127)   {       <span class="comment">// This means multibyte! (first byte!)</span>
<a name="l00753"></a>00753                                 <span class="keywordflow">if</span> ($ord &amp; 64)  {       <span class="comment">// Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.</span>
<a name="l00754"></a>00754                                         $buf=$chr;      <span class="comment">// Add first byte</span>
<a name="l00755"></a>00755                                         <span class="keywordflow">for</span> ($b=0;$b&lt;8;$b++)    {       <span class="comment">// for each byte in multibyte string...</span>
<a name="l00756"></a>00756                                                 $ord = $ord &lt;&lt; 1;       <span class="comment">// Shift it left and ...</span>
<a name="l00757"></a>00757                                                 <span class="keywordflow">if</span> ($ord &amp; 128) {       <span class="comment">// ... and with 8th bit - if that is set, then there are still bytes in sequence.</span>
<a name="l00758"></a>00758                                                         $a++;   <span class="comment">// Increase pointer...</span>
<a name="l00759"></a>00759                                                         $buf.=substr($str,$a,1);        <span class="comment">// ... and add the next char.</span>
<a name="l00760"></a>00760                                                 } <span class="keywordflow">else</span> <span class="keywordflow">break</span>;
<a name="l00761"></a>00761                                         }
<a name="l00762"></a>00762 
<a name="l00763"></a>00763                                         $outStr.='&amp;#'.$this-&gt;utf8CharToUnumber($buf,1).<span class="charliteral">';'</span>;
<a name="l00764"></a>00764                                 } <span class="keywordflow">else</span> $outStr.=chr($this-&gt;noCharByteVal);      <span class="comment">// No char exists (MIDDLE of MB sequence!)</span>
<a name="l00765"></a>00765                         } <span class="keywordflow">else</span> $outStr.=$chr;   <span class="comment">// ... otherwise it's just ASCII 0-127 and one byte. Transparent</span>
<a name="l00766"></a>00766                 }
<a name="l00767"></a>00767 
<a name="l00768"></a>00768                 <span class="keywordflow">return</span> $outStr;
<a name="l00769"></a>00769         }
<a name="l00770"></a>00770 
<a name="l00778"></a><a class="code" href="classt3lib__cs.html#9030ca4a9d778d7fe214e6391e26634c">00778</a>         function entities_to_utf8($str,$alsoStdHtmlEnt=0)       {
<a name="l00779"></a>00779                 <span class="keywordflow">if</span> ($alsoStdHtmlEnt)    {
<a name="l00780"></a>00780                         $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES));             <span class="comment">// Getting them in iso-8859-1 - but thats ok since this is observed below.</span>
<a name="l00781"></a>00781                 }
<a name="l00782"></a>00782 
<a name="l00783"></a>00783                 $token = md5(microtime());
<a name="l00784"></a>00784                 $parts = explode($token,ereg_replace('(&amp;([#[:alnum:]]*);)',$token.<span class="charliteral">'\2'</span>.$token,$str));
<a name="l00785"></a>00785                 foreach($parts as $k =&gt; $v)     {
<a name="l00786"></a>00786                         <span class="keywordflow">if</span> ($k%2)       {
<a name="l00787"></a>00787                                 <span class="keywordflow">if</span> (substr($v,0,1)==<span class="charliteral">'#'</span>)        {       <span class="comment">// Dec or hex entities:</span>
<a name="l00788"></a>00788                                         <span class="keywordflow">if</span> (substr($v,1,1)==<span class="charliteral">'x'</span>)        {
<a name="l00789"></a>00789                                                 $parts[$k] = $this-&gt;UnumberToChar(hexdec(substr($v,2)));
<a name="l00790"></a>00790                                         } <span class="keywordflow">else</span> {
<a name="l00791"></a>00791                                                 $parts[$k] = $this-&gt;UnumberToChar(substr($v,1));
<a name="l00792"></a>00792                                         }
<a name="l00793"></a>00793                                 } elseif ($alsoStdHtmlEnt &amp;&amp; $trans_tbl[<span class="charliteral">'&amp;'</span>.$v.<span class="charliteral">';'</span>]) {  <span class="comment">// Other entities:</span>
<a name="l00794"></a>00794                                         $parts[$k] = $this-&gt;utf8_encode($trans_tbl[<span class="charliteral">'&amp;'</span>.$v.<span class="charliteral">';'</span>],'iso-8859-1');
<a name="l00795"></a>00795                                 } <span class="keywordflow">else</span> {        <span class="comment">// No conversion:</span>
<a name="l00796"></a>00796                                         $parts[$k] =<span class="charliteral">'&amp;'</span>.$v.<span class="charliteral">';'</span>;
<a name="l00797"></a>00797                                 }
<a name="l00798"></a>00798                         }
<a name="l00799"></a>00799                 }
<a name="l00800"></a>00800 
<a name="l00801"></a>00801                 <span class="keywordflow">return</span> implode('',$parts);
<a name="l00802"></a>00802         }
<a name="l00803"></a>00803 
<a name="l00812"></a><a class="code" href="classt3lib__cs.html#c8d3e880bb47b5211a1befc1b93eeef0">00812</a>         function utf8_to_numberarray($str,$convEntities=0,$retChar=0)   {
<a name="l00813"></a>00813                         <span class="comment">// If entities must be registered as well...:</span>
<a name="l00814"></a>00814                 <span class="keywordflow">if</span> ($convEntities)      {
<a name="l00815"></a>00815                         $str = $this-&gt;entities_to_utf8($str,1);
<a name="l00816"></a>00816                 }
<a name="l00817"></a>00817                         <span class="comment">// Do conversion:</span>
<a name="l00818"></a>00818                 $strLen = strlen($str);
<a name="l00819"></a>00819                 $outArr=array();
<a name="l00820"></a>00820                 $buf='';
<a name="l00821"></a>00821                 <span class="keywordflow">for</span> ($a=0;$a&lt;$strLen;$a++)      {       <span class="comment">// Traverse each char in UTF-8 string.</span>
<a name="l00822"></a>00822                         $chr=substr($str,$a,1);
<a name="l00823"></a>00823                         $ord=ord($chr);
<a name="l00824"></a>00824                         <span class="keywordflow">if</span> ($ord&gt;127)   {       <span class="comment">// This means multibyte! (first byte!)</span>
<a name="l00825"></a>00825                                 <span class="keywordflow">if</span> ($ord &amp; 64)  {       <span class="comment">// Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.</span>
<a name="l00826"></a>00826                                         $buf=$chr;      <span class="comment">// Add first byte</span>
<a name="l00827"></a>00827                                         <span class="keywordflow">for</span> ($b=0;$b&lt;8;$b++)    {       <span class="comment">// for each byte in multibyte string...</span>
<a name="l00828"></a>00828                                                 $ord = $ord &lt;&lt; 1;       <span class="comment">// Shift it left and ...</span>
<a name="l00829"></a>00829                                                 <span class="keywordflow">if</span> ($ord &amp; 128) {       <span class="comment">// ... and with 8th bit - if that is set, then there are still bytes in sequence.</span>
<a name="l00830"></a>00830                                                         $a++;   <span class="comment">// Increase pointer...</span>
<a name="l00831"></a>00831                                                         $buf.=substr($str,$a,1);        <span class="comment">// ... and add the next char.</span>
<a name="l00832"></a>00832                                                 } <span class="keywordflow">else</span> <span class="keywordflow">break</span>;
<a name="l00833"></a>00833                                         }
<a name="l00834"></a>00834 
<a name="l00835"></a>00835                                         $outArr[]=$retChar?$buf:$this-&gt;utf8CharToUnumber($buf);
<a name="l00836"></a>00836                                 } <span class="keywordflow">else</span> $outArr[]=$retChar?chr($this-&gt;noCharByteVal):$this-&gt;noCharByteVal;       <span class="comment">// No char exists (MIDDLE of MB sequence!)</span>
<a name="l00837"></a>00837                         } <span class="keywordflow">else</span> $outArr[]=$retChar?chr($ord):$ord;       <span class="comment">// ... otherwise it's just ASCII 0-127 and one byte. Transparent</span>
<a name="l00838"></a>00838                 }
<a name="l00839"></a>00839 
<a name="l00840"></a>00840                 <span class="keywordflow">return</span> $outArr;
<a name="l00841"></a>00841         }
<a name="l00842"></a>00842 
<a name="l00862"></a><a class="code" href="classt3lib__cs.html#1248c2717104716a0e141236a8ade04b">00862</a>         function UnumberToChar($cbyte)  {
<a name="l00863"></a>00863                 $str='';
<a name="l00864"></a>00864 
<a name="l00865"></a>00865                 <span class="keywordflow">if</span> ($cbyte &lt; 0x80) {
<a name="l00866"></a>00866                         $str.=chr($cbyte);
<a name="l00867"></a>00867                 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte &lt; 0x800) {
<a name="l00868"></a>00868                         $str.=chr(0xC0 | ($cbyte &gt;&gt; 6));
<a name="l00869"></a>00869                         $str.=chr(0x80 | ($cbyte &amp; 0x3F));
<a name="l00870"></a>00870                 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte &lt; 0x10000) {
<a name="l00871"></a>00871                         $str.=chr(0xE0 | ($cbyte &gt;&gt; 12));
<a name="l00872"></a>00872                         $str.=chr(0x80 | (($cbyte &gt;&gt; 6) &amp; 0x3F));
<a name="l00873"></a>00873                         $str.=chr(0x80 | ($cbyte &amp; 0x3F));
<a name="l00874"></a>00874                 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte &lt; 0x200000) {
<a name="l00875"></a>00875                         $str.=chr(0xF0 | ($cbyte &gt;&gt; 18));
<a name="l00876"></a>00876                         $str.=chr(0x80 | (($cbyte &gt;&gt; 12) &amp; 0x3F));
<a name="l00877"></a>00877                         $str.=chr(0x80 | (($cbyte &gt;&gt; 6) &amp; 0x3F));
<a name="l00878"></a>00878                         $str.=chr(0x80 | ($cbyte &amp; 0x3F));
<a name="l00879"></a>00879                 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte &lt; 0x4000000) {
<a name="l00880"></a>00880                         $str.=chr(0xF8 | ($cbyte &gt;&gt; 24));
<a name="l00881"></a>00881                         $str.=chr(0x80 | (($cbyte &gt;&gt; 18) &amp; 0x3F));
<a name="l00882"></a>00882                         $str.=chr(0x80 | (($cbyte &gt;&gt; 12) &amp; 0x3F));
<a name="l00883"></a>00883                         $str.=chr(0x80 | (($cbyte &gt;&gt; 6) &amp; 0x3F));
<a name="l00884"></a>00884                         $str.=chr(0x80 | ($cbyte &amp; 0x3F));
<a name="l00885"></a>00885                 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte &lt; 0x80000000) {
<a name="l00886"></a>00886                         $str.=chr(0xFC | ($cbyte &gt;&gt; 30));
<a name="l00887"></a>00887                         $str.=chr(0x80 | (($cbyte &gt;&gt; 24) &amp; 0x3F));
<a name="l00888"></a>00888                         $str.=chr(0x80 | (($cbyte &gt;&gt; 18) &amp; 0x3F));
<a name="l00889"></a>00889                         $str.=chr(0x80 | (($cbyte &gt;&gt; 12) &amp; 0x3F));
<a name="l00890"></a>00890                         $str.=chr(0x80 | (($cbyte &gt;&gt; 6) &amp; 0x3F));
<a name="l00891"></a>00891                         $str.=chr(0x80 | ($cbyte &amp; 0x3F));
<a name="l00892"></a>00892                 } <span class="keywordflow">else</span> { <span class="comment">// Cannot express a 32-bit character in UTF-8</span>
<a name="l00893"></a>00893                         $str .= chr($this-&gt;noCharByteVal);
<a name="l00894"></a>00894                 }
<a name="l00895"></a>00895                 <span class="keywordflow">return</span> $str;
<a name="l00896"></a>00896         }
<a name="l00897"></a>00897 
<a name="l00907"></a><a class="code" href="classt3lib__cs.html#9b8139f08c59ea65af04bcddecc7e98e">00907</a>         function utf8CharToUnumber($str,$hex=0) {
<a name="l00908"></a>00908                 $ord=ord(substr($str,0,1));     <span class="comment">// First char</span>
<a name="l00909"></a>00909 
<a name="l00910"></a>00910                 <span class="keywordflow">if</span> (($ord &amp; 192) == 192)        {       <span class="comment">// This verifyes that it IS a multi byte string</span>
<a name="l00911"></a>00911                         $binBuf='';
<a name="l00912"></a>00912                         <span class="keywordflow">for</span> ($b=0;$b&lt;8;$b++)    {       <span class="comment">// for each byte in multibyte string...</span>
<a name="l00913"></a>00913                                 $ord = $ord &lt;&lt; 1;       <span class="comment">// Shift it left and ...</span>
<a name="l00914"></a>00914                                 <span class="keywordflow">if</span> ($ord &amp; 128) {       <span class="comment">// ... and with 8th bit - if that is set, then there are still bytes in sequence.</span>
<a name="l00915"></a>00915                                         $binBuf.=substr('00000000'.decbin(ord(substr($str,$b+1,1))),-6);
<a name="l00916"></a>00916                                 } <span class="keywordflow">else</span> <span class="keywordflow">break</span>;
<a name="l00917"></a>00917                         }
<a name="l00918"></a>00918                         $binBuf=substr('00000000'.decbin(ord(substr($str,0,1))),-(6-$b)).$binBuf;
<a name="l00919"></a>00919 
<a name="l00920"></a>00920                         $int = bindec($binBuf);
<a name="l00921"></a>00921                 } <span class="keywordflow">else</span> $int = $ord;
<a name="l00922"></a>00922 
<a name="l00923"></a>00923                 <span class="keywordflow">return</span> $hex ? <span class="charliteral">'x'</span>.dechex($int) : $int;
<a name="l00924"></a>00924         }
<a name="l00925"></a>00925 
<a name="l00926"></a>00926 
<a name="l00927"></a>00927 
<a name="l00928"></a>00928 
<a name="l00929"></a>00929 
<a name="l00930"></a>00930 
<a name="l00931"></a>00931 
<a name="l00932"></a>00932 
<a name="l00933"></a>00933 
<a name="l00934"></a>00934         <span class="comment">/********************************************</span>
<a name="l00935"></a>00935 <span class="comment">         *</span>
<a name="l00936"></a>00936 <span class="comment">         * Init functions</span>
<a name="l00937"></a>00937 <span class="comment">         *</span>
<a name="l00938"></a>00938 <span class="comment">         ********************************************/</span>
<a name="l00939"></a>00939 
<a name="l00950"></a><a class="code" href="classt3lib__cs.html#630dd257265f8f9f0955d21cafee1e56">00950</a>         function initCharset($charset)  {
<a name="l00951"></a>00951                         <span class="comment">// Only process if the charset is not yet loaded:</span>
<a name="l00952"></a>00952                 <span class="keywordflow">if</span> (!is_array($this-&gt;parsedCharsets[$charset])) {
<a name="l00953"></a>00953 
<a name="l00954"></a>00954                                 <span class="comment">// Conversion table filename:</span>
<a name="l00955"></a>00955                         $charsetConvTableFile = PATH_t3lib.'csconvtbl/'.$charset.'.tbl';
<a name="l00956"></a>00956 
<a name="l00957"></a>00957                                 <span class="comment">// If the conversion table is found:</span>
<a name="l00958"></a>00958                         <span class="keywordflow">if</span> ($charset &amp;&amp; <a class="code" href="classt3lib__div.html#beba21fa59f1c0160c54d4174d19baf4">t3lib_div::validPathStr</a>($charsetConvTableFile) &amp;&amp; @is_file($charsetConvTableFile))      {
<a name="l00959"></a>00959                                         <span class="comment">// Cache file for charsets:</span>
<a name="l00960"></a>00960                                         <span class="comment">// Caching brought parsing time for gb2312 down from 2400 ms to 150 ms. For other charsets we are talking 11 ms down to zero.</span>
<a name="l00961"></a>00961                                 $cacheFile = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/charset_'.$charset.'.tbl');
<a name="l00962"></a>00962                                 <span class="keywordflow">if</span> ($cacheFile &amp;&amp; @is_file($cacheFile)) {
<a name="l00963"></a>00963                                         $this-&gt;parsedCharsets[$charset]=unserialize(t3lib_div::getUrl($cacheFile));
<a name="l00964"></a>00964                                 } <span class="keywordflow">else</span> {
<a name="l00965"></a>00965                                                 <span class="comment">// Parse conversion table into lines:</span>
<a name="l00966"></a>00966                                         $lines=<a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(chr(10),t3lib_div::getUrl($charsetConvTableFile),1);
<a name="l00967"></a>00967                                                 <span class="comment">// Initialize the internal variable holding the conv. table:</span>
<a name="l00968"></a>00968                                         $this-&gt;parsedCharsets[$charset]=array('local'=&gt;array(),'utf8'=&gt;array());
<a name="l00969"></a>00969                                                 <span class="comment">// traverse the lines:</span>
<a name="l00970"></a>00970                                         $detectedType='';
<a name="l00971"></a>00971                                         foreach($lines as $value)       {
<a name="l00972"></a>00972                                                 <span class="keywordflow">if</span> (trim($value) &amp;&amp; substr($value,0,1)!=<span class="charliteral">'#'</span>)    {       <span class="comment">// Comment line or blanks are ignored.</span>
<a name="l00973"></a>00973 
<a name="l00974"></a>00974                                                                 <span class="comment">// Detect type if not done yet: (Done on first real line)</span>
<a name="l00975"></a>00975                                                                 <span class="comment">// The "whitespaced" type is on the syntax      "0x0A   0x000A  #LINE FEED"     while   "ms-token" is like              "B9 = U+00B9 : SUPERSCRIPT ONE"</span>
<a name="l00976"></a>00976                                                         <span class="keywordflow">if</span> (!$detectedType)             $detectedType = ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value) ? 'whitespaced' : 'ms-token';
<a name="l00977"></a>00977 
<a name="l00978"></a>00978                                                         <span class="keywordflow">if</span> ($detectedType=='ms-token')  {
<a name="l00979"></a>00979                                                                 list($hexbyte,$utf8) = split('=|:',$value,3);
<a name="l00980"></a>00980                                                         } elseif ($detectedType=='whitespaced') {
<a name="l00981"></a>00981                                                                 $regA=array();
<a name="l00982"></a>00982                                                                 ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value,$regA);
<a name="l00983"></a>00983                                                                 $hexbyte = $regA[1];
<a name="l00984"></a>00984                                                                 $utf8 = 'U+'.$regA[2];
<a name="l00985"></a>00985                                                         }
<a name="l00986"></a>00986                                                         $decval = hexdec(trim($hexbyte));
<a name="l00987"></a>00987                                                         <span class="keywordflow">if</span> ($decval&gt;127)        {
<a name="l00988"></a>00988                                                                 $utf8decval = hexdec(substr(trim($utf8),2));
<a name="l00989"></a>00989                                                                 $this-&gt;parsedCharsets[$charset]['local'][$decval]=$this-&gt;UnumberToChar($utf8decval);
<a name="l00990"></a>00990                                                                 $this-&gt;parsedCharsets[$charset]['utf8'][$this-&gt;parsedCharsets[$charset]['local'][$decval]]=$decval;
<a name="l00991"></a>00991                                                         }
<a name="l00992"></a>00992                                                 }
<a name="l00993"></a>00993                                         }
<a name="l00994"></a>00994                                         <span class="keywordflow">if</span> ($cacheFile) {
<a name="l00995"></a>00995                                                 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFile,serialize($this-&gt;parsedCharsets[$charset]));
<a name="l00996"></a>00996                                         }
<a name="l00997"></a>00997                                 }
<a name="l00998"></a>00998                                 <span class="keywordflow">return</span> 2;
<a name="l00999"></a>00999                         } <span class="keywordflow">else</span> <span class="keywordflow">return</span> <span class="keyword">false</span>;
<a name="l01000"></a>01000                 } <span class="keywordflow">else</span> <span class="keywordflow">return</span> 1;
<a name="l01001"></a>01001         }
<a name="l01002"></a>01002 
<a name="l01012"></a><a class="code" href="classt3lib__cs.html#342924e5872abb1227910d97556e5f48">01012</a>         function initUnicodeData($mode=null)    {
<a name="l01013"></a>01013                         <span class="comment">// cache files</span>
<a name="l01014"></a>01014                 $cacheFileCase = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/cscase_utf-8.tbl');
<a name="l01015"></a>01015                 $cacheFileASCII = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/csascii_utf-8.tbl');
<a name="l01016"></a>01016 
<a name="l01017"></a>01017                         <span class="comment">// Only process if the tables are not yet loaded</span>
<a name="l01018"></a>01018                 <span class="keywordflow">switch</span>($mode)   {
<a name="l01019"></a>01019                         <span class="keywordflow">case</span> '<span class="keywordflow">case</span>':
<a name="l01020"></a>01020                                 <span class="keywordflow">if</span> (is_array($this-&gt;caseFolding['utf-8']))      <span class="keywordflow">return</span> 1;
<a name="l01021"></a>01021 
<a name="l01022"></a>01022                                         <span class="comment">// Use cached version if possible</span>
<a name="l01023"></a>01023                                 <span class="keywordflow">if</span> ($cacheFileCase &amp;&amp; @is_file($cacheFileCase)) {
<a name="l01024"></a>01024                                         $this-&gt;caseFolding['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileCase));
<a name="l01025"></a>01025                                         <span class="keywordflow">return</span> 2;
<a name="l01026"></a>01026                                 }
<a name="l01027"></a>01027                                 <span class="keywordflow">break</span>;
<a name="l01028"></a>01028 
<a name="l01029"></a>01029                         <span class="keywordflow">case</span> 'ascii':
<a name="l01030"></a>01030                                 <span class="keywordflow">if</span> (is_array($this-&gt;toASCII['utf-8']))  <span class="keywordflow">return</span> 1;
<a name="l01031"></a>01031 
<a name="l01032"></a>01032                                         <span class="comment">// Use cached version if possible</span>
<a name="l01033"></a>01033                                 <span class="keywordflow">if</span> ($cacheFileASCII &amp;&amp; @is_file($cacheFileASCII))       {
<a name="l01034"></a>01034                                         $this-&gt;toASCII['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileASCII));
<a name="l01035"></a>01035                                         <span class="keywordflow">return</span> 2;
<a name="l01036"></a>01036                                 }
<a name="l01037"></a>01037                                 <span class="keywordflow">break</span>;
<a name="l01038"></a>01038                 }
<a name="l01039"></a>01039 
<a name="l01040"></a>01040                         <span class="comment">// process main Unicode data file</span>
<a name="l01041"></a>01041                 $unicodeDataFile = PATH_t3lib.'unidata/UnicodeData.txt';
<a name="l01042"></a>01042                 <span class="keywordflow">if</span> (!(<a class="code" href="classt3lib__div.html#beba21fa59f1c0160c54d4174d19baf4">t3lib_div::validPathStr</a>($unicodeDataFile) &amp;&amp; @is_file($unicodeDataFile))) <span class="keywordflow">return</span> <span class="keyword">false</span>;
<a name="l01043"></a>01043 
<a name="l01044"></a>01044                 $fh = fopen($unicodeDataFile,'rb');
<a name="l01045"></a>01045                 <span class="keywordflow">if</span> (!$fh)       <span class="keywordflow">return</span> <span class="keyword">false</span>;
<a name="l01046"></a>01046 
<a name="l01047"></a>01047                         <span class="comment">// key = utf8 char (single codepoint), value = utf8 string (codepoint sequence)</span>
<a name="l01048"></a>01048                         <span class="comment">// note: we use the UTF-8 characters here and not the Unicode numbers to avoid conversion roundtrip in utf8_strtolower/-upper)</span>
<a name="l01049"></a>01049                 $this-&gt;caseFolding['utf-8'] = array();
<a name="l01050"></a>01050                 $utf8CaseFolding =&amp; $this-&gt;caseFolding['utf-8']; <span class="comment">// a shorthand</span>
<a name="l01051"></a>01051                 $utf8CaseFolding['toUpper'] = array();
<a name="l01052"></a>01052                 $utf8CaseFolding['toLower'] = array();
<a name="l01053"></a>01053                 $utf8CaseFolding['toTitle'] = array();
<a name="l01054"></a>01054 
<a name="l01055"></a>01055                 $decomposition = array();       <span class="comment">// array of temp. decompositions</span>
<a name="l01056"></a>01056                 $mark = array();                <span class="comment">// array of chars that are marks (eg. composing accents)</span>
<a name="l01057"></a>01057                 $number = array();              <span class="comment">// array of chars that are numbers (eg. digits)</span>
<a name="l01058"></a>01058                 $omit = array();                <span class="comment">// array of chars to be omitted (eg. Russian hard sign)</span>
<a name="l01059"></a>01059 
<a name="l01060"></a>01060                 <span class="keywordflow">while</span> (!feof($fh))      {
<a name="l01061"></a>01061                         $line = fgets($fh,4096);
<a name="l01062"></a>01062                                 <span class="comment">// has a lot of info</span>
<a name="l01063"></a>01063                         list($char,$name,$cat,,,$decomp,,,$num,,,,$upper,$lower,$title,) = split(<span class="charliteral">';'</span>, rtrim($line));
<a name="l01064"></a>01064 
<a name="l01065"></a>01065                         $ord = hexdec($char);
<a name="l01066"></a>01066                         <span class="keywordflow">if</span> ($ord &gt; 0xFFFF)      <span class="keywordflow">break</span>;  <span class="comment">// only process the BMP</span>
<a name="l01067"></a>01067 
<a name="l01068"></a>01068                         $utf8_char = $this-&gt;UnumberToChar($ord);
<a name="l01069"></a>01069 
<a name="l01070"></a>01070                         <span class="keywordflow">if</span> ($upper)     $utf8CaseFolding['toUpper'][$utf8_char] = $this-&gt;UnumberToChar(hexdec($upper));
<a name="l01071"></a>01071                         <span class="keywordflow">if</span> ($lower)     $utf8CaseFolding['toLower'][$utf8_char] = $this-&gt;UnumberToChar(hexdec($lower));
<a name="l01072"></a>01072                                 <span class="comment">// store "title" only when different from "upper" (only a few)</span>
<a name="l01073"></a>01073                         <span class="keywordflow">if</span> ($title &amp;&amp; $title != $upper) $utf8CaseFolding['toTitle'][$utf8_char] = $this-&gt;UnumberToChar(hexdec($title));
<a name="l01074"></a>01074 
<a name="l01075"></a>01075                         <span class="keywordflow">switch</span> ($cat{0})        {
<a name="l01076"></a>01076                                 <span class="keywordflow">case</span> <span class="charliteral">'M'</span>:       <span class="comment">// mark (accent, umlaut, ...)</span>
<a name="l01077"></a>01077                                         $mark[<span class="stringliteral">"U+$char"</span>] = 1;
<a name="l01078"></a>01078                                         <span class="keywordflow">break</span>;
<a name="l01079"></a>01079 
<a name="l01080"></a>01080                                 <span class="keywordflow">case</span> <span class="charliteral">'N'</span>:       <span class="comment">// numeric value</span>
<a name="l01081"></a>01081                                         <span class="keywordflow">if</span> ($ord &gt; 0x80 &amp;&amp; $num != '')  $number[<span class="stringliteral">"U+$char"</span>] = $num;
<a name="l01082"></a>01082                         }
<a name="l01083"></a>01083 
<a name="l01084"></a>01084                                 <span class="comment">// accented Latin letters without "official" decomposition</span>
<a name="l01085"></a>01085                         $match = array();
<a name="l01086"></a>01086                         <span class="keywordflow">if</span> (ereg('^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH',$name,$match) &amp;&amp; !$decomp)        {
<a name="l01087"></a>01087                                 $c = ord($match[2]);
<a name="l01088"></a>01088                                 <span class="keywordflow">if</span> ($match[1] == 'SMALL')       $c += 32;
<a name="l01089"></a>01089 
<a name="l01090"></a>01090                                 $decomposition[<span class="stringliteral">"U+$char"</span>] = array(dechex($c));
<a name="l01091"></a>01091                                 <span class="keywordflow">continue</span>;
<a name="l01092"></a>01092                         }
<a name="l01093"></a>01093 
<a name="l01094"></a>01094                         $match = array();
<a name="l01095"></a>01095                         <span class="keywordflow">if</span> (ereg('(&lt;.*&gt;)? *(.+)',$decomp,$match))       {
<a name="l01096"></a>01096                                 <span class="keywordflow">switch</span>($match[1])       {
<a name="l01097"></a>01097                                         <span class="keywordflow">case</span> '&lt;circle&gt;':        <span class="comment">// add parenthesis as circle replacement, eg (1)</span>
<a name="l01098"></a>01098                                                 $match[2] = '0028 '.$match[2].' 0029';
<a name="l01099"></a>01099                                                 <span class="keywordflow">break</span>;
<a name="l01100"></a>01100 
<a name="l01101"></a>01101                                         <span class="keywordflow">case</span> '&lt;square&gt;':        <span class="comment">// add square brackets as square replacement, eg [1]</span>
<a name="l01102"></a>01102                                                 $match[2] = '005B '.$match[2].' 005D';
<a name="l01103"></a>01103                                                 <span class="keywordflow">break</span>;
<a name="l01104"></a>01104 
<a name="l01105"></a>01105                                         <span class="keywordflow">case</span> '&lt;compat&gt;':        <span class="comment">// ignore multi char decompositions that start with a space</span>
<a name="l01106"></a>01106                                                 <span class="keywordflow">if</span> (ereg('^0020 ',$match[2]))   <span class="keywordflow">continue</span> 2;
<a name="l01107"></a>01107                                                 <span class="keywordflow">break</span>;
<a name="l01108"></a>01108 
<a name="l01109"></a>01109                                                 <span class="comment">// ignore Arabic and vertical layout presentation decomposition</span>
<a name="l01110"></a>01110                                         <span class="keywordflow">case</span> '&lt;initial&gt;':
<a name="l01111"></a>01111                                         <span class="keywordflow">case</span> '&lt;medial&gt;':
<a name="l01112"></a>01112                                         <span class="keywordflow">case</span> '&lt;<span class="keyword">final</span>&gt;':
<a name="l01113"></a>01113                                         <span class="keywordflow">case</span> '&lt;isolated&gt;':
<a name="l01114"></a>01114                                         <span class="keywordflow">case</span> '&lt;vertical&gt;':
<a name="l01115"></a>01115                                                 <span class="keywordflow">continue</span> 2;
<a name="l01116"></a>01116                                 }
<a name="l01117"></a>01117                                 $decomposition[<span class="stringliteral">"U+$char"</span>] = split(<span class="charliteral">' '</span>,$match[2]);
<a name="l01118"></a>01118                         }
<a name="l01119"></a>01119                 }
<a name="l01120"></a>01120                 fclose($fh);
<a name="l01121"></a>01121 
<a name="l01122"></a>01122                         <span class="comment">// process additional Unicode data for casing (allow folded characters to expand into a sequence)</span>
<a name="l01123"></a>01123                 $specialCasingFile = PATH_t3lib.'unidata/SpecialCasing.txt';
<a name="l01124"></a>01124                 <span class="keywordflow">if</span> (<a class="code" href="classt3lib__div.html#beba21fa59f1c0160c54d4174d19baf4">t3lib_div::validPathStr</a>($specialCasingFile) &amp;&amp; @is_file($specialCasingFile))        {
<a name="l01125"></a>01125                         $fh = fopen($specialCasingFile,'rb');
<a name="l01126"></a>01126                         <span class="keywordflow">if</span> ($fh)        {
<a name="l01127"></a>01127                                 <span class="keywordflow">while</span> (!feof($fh))      {
<a name="l01128"></a>01128                                         $line = fgets($fh,4096);
<a name="l01129"></a>01129                                         <span class="keywordflow">if</span> ($line{0} != <span class="charliteral">'#'</span> &amp;&amp; trim($line) != '')       {
<a name="l01130"></a>01130 
<a name="l01131"></a>01131                                                 list($char,$lower,$title,$upper,$cond) = <a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(<span class="charliteral">';'</span>, $line);
<a name="l01132"></a>01132                                                 <span class="keywordflow">if</span> ($cond == '' || $cond{0} == <span class="charliteral">'#'</span>)     {
<a name="l01133"></a>01133                                                         $utf8_char = $this-&gt;UnumberToChar(hexdec($char));
<a name="l01134"></a>01134                                                         <span class="keywordflow">if</span> ($char != $lower)    {
<a name="l01135"></a>01135                                                                 $arr = split(<span class="charliteral">' '</span>,$lower);
<a name="l01136"></a>01136                                                                 <span class="keywordflow">for</span> ($i=0; isset($arr[$i]); $i++)       $arr[$i] = $this-&gt;UnumberToChar(hexdec($arr[$i]));
<a name="l01137"></a>01137                                                                 $utf8CaseFolding['toLower'][$utf8_char] = implode('',$arr);
<a name="l01138"></a>01138                                                         }
<a name="l01139"></a>01139                                                         <span class="keywordflow">if</span> ($char != $title &amp;&amp; $title != $upper)        {
<a name="l01140"></a>01140                                                                 $arr = split(<span class="charliteral">' '</span>,$title);
<a name="l01141"></a>01141                                                                 <span class="keywordflow">for</span> ($i=0; isset($arr[$i]); $i++)       $arr[$i] = $this-&gt;UnumberToChar(hexdec($arr[$i]));
<a name="l01142"></a>01142                                                                 $utf8CaseFolding['toTitle'][$utf8_char] = implode('',$arr);
<a name="l01143"></a>01143                                                         }
<a name="l01144"></a>01144                                                         <span class="keywordflow">if</span> ($char != $upper)    {
<a name="l01145"></a>01145                                                                         $arr = split(<span class="charliteral">' '</span>,$upper);
<a name="l01146"></a>01146                                                                 <span class="keywordflow">for</span> ($i=0; isset($arr[$i]); $i++)       $arr[$i] = $this-&gt;UnumberToChar(hexdec($arr[$i]));
<a name="l01147"></a>01147                                                                 $utf8CaseFolding['toUpper'][$utf8_char] = implode('',$arr);
<a name="l01148"></a>01148                                                         }
<a name="l01149"></a>01149                                                 }
<a name="l01150"></a>01150                                         }
<a name="l01151"></a>01151                                 }
<a name="l01152"></a>01152                                 fclose($fh);
<a name="l01153"></a>01153                         }
<a name="l01154"></a>01154                 }
<a name="l01155"></a>01155 
<a name="l01156"></a>01156                         <span class="comment">// process custom decompositions</span>
<a name="l01157"></a>01157                 $customTranslitFile = PATH_t3lib.'unidata/Translit.txt';
<a name="l01158"></a>01158                 <span class="keywordflow">if</span> (<a class="code" href="classt3lib__div.html#beba21fa59f1c0160c54d4174d19baf4">t3lib_div::validPathStr</a>($customTranslitFile) &amp;&amp; @is_file($customTranslitFile))      {
<a name="l01159"></a>01159                         $fh = fopen($customTranslitFile,'rb');
<a name="l01160"></a>01160                         <span class="keywordflow">if</span> ($fh)        {
<a name="l01161"></a>01161                                 <span class="keywordflow">while</span> (!feof($fh))      {
<a name="l01162"></a>01162                                         $line = fgets($fh,4096);
<a name="l01163"></a>01163                                         <span class="keywordflow">if</span> ($line{0} != <span class="charliteral">'#'</span> &amp;&amp; trim($line) != '')       {
<a name="l01164"></a>01164                                                 list($char,$translit) = <a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(<span class="charliteral">';'</span>, $line);
<a name="l01165"></a>01165                                                 <span class="keywordflow">if</span> (!$translit) $omit[<span class="stringliteral">"U+$char"</span>] = 1;
<a name="l01166"></a>01166                                                 $decomposition[<span class="stringliteral">"U+$char"</span>] = split(<span class="charliteral">' '</span>, $translit);
<a name="l01167"></a>01167 
<a name="l01168"></a>01168                                         }
<a name="l01169"></a>01169                                 }
<a name="l01170"></a>01170                                 fclose($fh);
<a name="l01171"></a>01171                         }
<a name="l01172"></a>01172                 }
<a name="l01173"></a>01173 
<a name="l01174"></a>01174                         <span class="comment">// decompose and remove marks; inspired by unac (Loic Dachary &lt;loic@senga.org&gt;)</span>
<a name="l01175"></a>01175                 foreach($decomposition as $from =&gt; $to) {
<a name="l01176"></a>01176                         $code_decomp = array();
<a name="l01177"></a>01177 
<a name="l01178"></a>01178                         <span class="keywordflow">while</span> ($code_value = array_shift($to))  {
<a name="l01179"></a>01179                                 <span class="keywordflow">if</span> (isset($decomposition[<span class="stringliteral">"U+$code_value"</span>]))     {       <span class="comment">// do recursive decomposition</span>
<a name="l01180"></a>01180                                         foreach(array_reverse($decomposition[<span class="stringliteral">"U+$code_value"</span>]) as $cv)  {
<a name="l01181"></a>01181                                                 array_unshift($to, $cv);
<a name="l01182"></a>01182                                         }
<a name="l01183"></a>01183                                 } elseif (!isset($mark[<span class="stringliteral">"U+$code_value"</span>])) {     <span class="comment">// remove mark</span>
<a name="l01184"></a>01184                                         array_push($code_decomp, $code_value);
<a name="l01185"></a>01185                                 }
<a name="l01186"></a>01186                         }
<a name="l01187"></a>01187                         <span class="keywordflow">if</span> (count($code_decomp) || isset($omit[$from])) {
<a name="l01188"></a>01188                                 $decomposition[$from] = $code_decomp;
<a name="l01189"></a>01189                         } <span class="keywordflow">else</span> {
<a name="l01190"></a>01190                                 unset($decomposition[$from]);
<a name="l01191"></a>01191                         }
<a name="l01192"></a>01192                 }
<a name="l01193"></a>01193 
<a name="l01194"></a>01194                         <span class="comment">// create ascii only mapping</span>
<a name="l01195"></a>01195                 $this-&gt;toASCII['utf-8'] = array();
<a name="l01196"></a>01196                 $ascii =&amp; $this-&gt;toASCII['utf-8'];
<a name="l01197"></a>01197 
<a name="l01198"></a>01198                 foreach($decomposition as $from =&gt; $to) {
<a name="l01199"></a>01199                         $code_decomp = array();
<a name="l01200"></a>01200                         <span class="keywordflow">while</span> ($code_value = array_shift($to))  {
<a name="l01201"></a>01201                                 $ord = hexdec($code_value);
<a name="l01202"></a>01202                                 <span class="keywordflow">if</span> ($ord &gt; 127)
<a name="l01203"></a>01203                                         <span class="keywordflow">continue</span> 2;     <span class="comment">// skip decompositions containing non-ASCII chars</span>
<a name="l01204"></a>01204                                 <span class="keywordflow">else</span>
<a name="l01205"></a>01205                                         array_push($code_decomp,chr($ord));
<a name="l01206"></a>01206                         }
<a name="l01207"></a>01207                         $ascii[$this-&gt;UnumberToChar(hexdec($from))] = join('',$code_decomp);
<a name="l01208"></a>01208                 }
<a name="l01209"></a>01209 
<a name="l01210"></a>01210                         <span class="comment">// add numeric decompositions</span>
<a name="l01211"></a>01211                 foreach($number as $from =&gt; $to)        {
<a name="l01212"></a>01212                         $utf8_char = $this-&gt;UnumberToChar(hexdec($from));
<a name="l01213"></a>01213                         <span class="keywordflow">if</span> (!isset($ascii[$utf8_char])) {
<a name="l01214"></a>01214                                 $ascii[$utf8_char] = $to;
<a name="l01215"></a>01215                         }
<a name="l01216"></a>01216                 }
<a name="l01217"></a>01217 
<a name="l01218"></a>01218                 <span class="keywordflow">if</span> ($cacheFileCase)     {
<a name="l01219"></a>01219                                 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFileCase,serialize($utf8CaseFolding));
<a name="l01220"></a>01220                 }
<a name="l01221"></a>01221 
<a name="l01222"></a>01222                 <span class="keywordflow">if</span> ($cacheFileASCII)    {
<a name="l01223"></a>01223                                 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFileASCII,serialize($ascii));
<a name="l01224"></a>01224                 }
<a name="l01225"></a>01225 
<a name="l01226"></a>01226                 <span class="keywordflow">return</span> 3;
<a name="l01227"></a>01227         }
<a name="l01228"></a>01228 
<a name="l01237"></a><a class="code" href="classt3lib__cs.html#68f8c921a75e9928f7726d5438c71405">01237</a>         function initCaseFolding($charset)      {
<a name="l01238"></a>01238                         <span class="comment">// Only process if the case table is not yet loaded:</span>
<a name="l01239"></a>01239                 <span class="keywordflow">if</span> (is_array($this-&gt;caseFolding[$charset]))     <span class="keywordflow">return</span> 1;
<a name="l01240"></a>01240 
<a name="l01241"></a>01241                         <span class="comment">// Use cached version if possible</span>
<a name="l01242"></a>01242                 $cacheFile = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/cscase_'.$charset.'.tbl');
<a name="l01243"></a>01243                 <span class="keywordflow">if</span> ($cacheFile &amp;&amp; @is_file($cacheFile)) {
<a name="l01244"></a>01244                         $this-&gt;caseFolding[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
<a name="l01245"></a>01245                         <span class="keywordflow">return</span> 2;
<a name="l01246"></a>01246                 }
<a name="l01247"></a>01247 
<a name="l01248"></a>01248                         <span class="comment">// init UTF-8 conversion for this charset</span>
<a name="l01249"></a>01249                 <span class="keywordflow">if</span> (!$this-&gt;initCharset($charset))      {
<a name="l01250"></a>01250                         <span class="keywordflow">return</span> <span class="keyword">false</span>;
<a name="l01251"></a>01251                 }
<a name="l01252"></a>01252 
<a name="l01253"></a>01253                         <span class="comment">// UTF-8 case folding is used as the base conversion table</span>
<a name="l01254"></a>01254                 <span class="keywordflow">if</span> (!$this-&gt;initUnicodeData('<span class="keywordflow">case</span>'))    {
<a name="l01255"></a>01255                         <span class="keywordflow">return</span> <span class="keyword">false</span>;
<a name="l01256"></a>01256                 }
<a name="l01257"></a>01257 
<a name="l01258"></a>01258                 $nochar = chr($this-&gt;noCharByteVal);
<a name="l01259"></a>01259                 foreach ($this-&gt;parsedCharsets[$charset]['local'] as $ci =&gt; $utf8)      {
<a name="l01260"></a>01260                                 <span class="comment">// reconvert to charset (don't use chr() of numeric value, might be muli-byte)</span>
<a name="l01261"></a>01261                         $c = $this-&gt;utf8_decode($utf8, $charset);
<a name="l01262"></a>01262 
<a name="l01263"></a>01263                                 <span class="comment">// $cc = $this-&gt;conv($this-&gt;caseFolding['utf-8']['toUpper'][$utf8], 'utf-8', $charset);</span>
<a name="l01264"></a>01264                         $cc = $this-&gt;utf8_decode($this-&gt;caseFolding['utf-8']['toUpper'][$utf8], $charset);
<a name="l01265"></a>01265                         <span class="keywordflow">if</span> ($cc != '' &amp;&amp; $cc != $nochar)        $this-&gt;caseFolding[$charset]['toUpper'][$c] = $cc;
<a name="l01266"></a>01266 
<a name="l01267"></a>01267                                 <span class="comment">// $cc = $this-&gt;conv($this-&gt;caseFolding['utf-8']['toLower'][$utf8], 'utf-8', $charset);</span>
<a name="l01268"></a>01268                         $cc = $this-&gt;utf8_decode($this-&gt;caseFolding['utf-8']['toLower'][$utf8], $charset);
<a name="l01269"></a>01269                         <span class="keywordflow">if</span> ($cc != '' &amp;&amp; $cc != $nochar)        $this-&gt;caseFolding[$charset]['toLower'][$c] = $cc;
<a name="l01270"></a>01270 
<a name="l01271"></a>01271                                 <span class="comment">// $cc = $this-&gt;conv($this-&gt;caseFolding['utf-8']['toTitle'][$utf8], 'utf-8', $charset);</span>
<a name="l01272"></a>01272                         $cc = $this-&gt;utf8_decode($this-&gt;caseFolding['utf-8']['toTitle'][$utf8], $charset);
<a name="l01273"></a>01273                         <span class="keywordflow">if</span> ($cc != '' &amp;&amp; $cc != $nochar)        $this-&gt;caseFolding[$charset]['toTitle'][$c] = $cc;
<a name="l01274"></a>01274                 }
<a name="l01275"></a>01275 
<a name="l01276"></a>01276                         <span class="comment">// add the ASCII case table</span>
<a name="l01277"></a>01277                 <span class="keywordflow">for</span> ($i=ord(<span class="charliteral">'a'</span>); $i&lt;=ord(<span class="charliteral">'z'</span>); $i++)   {
<a name="l01278"></a>01278                         $this-&gt;caseFolding[$charset]['toUpper'][chr($i)] = chr($i-32);
<a name="l01279"></a>01279                 }
<a name="l01280"></a>01280                 <span class="keywordflow">for</span> ($i=ord(<span class="charliteral">'A'</span>); $i&lt;=ord(<span class="charliteral">'Z'</span>); $i++)   {
<a name="l01281"></a>01281                         $this-&gt;caseFolding[$charset]['toLower'][chr($i)] = chr($i+32);
<a name="l01282"></a>01282                 }
<a name="l01283"></a>01283 
<a name="l01284"></a>01284                 <span class="keywordflow">if</span> ($cacheFile) {
<a name="l01285"></a>01285                                 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFile,serialize($this-&gt;caseFolding[$charset]));
<a name="l01286"></a>01286                 }
<a name="l01287"></a>01287 
<a name="l01288"></a>01288                 <span class="keywordflow">return</span> 3;
<a name="l01289"></a>01289         }
<a name="l01290"></a>01290 
<a name="l01299"></a><a class="code" href="classt3lib__cs.html#17dabff9f5cb2162b4c1b0b1d9ac066f">01299</a>         function initToASCII($charset)  {
<a name="l01300"></a>01300                         <span class="comment">// Only process if the case table is not yet loaded:</span>
<a name="l01301"></a>01301                 <span class="keywordflow">if</span> (is_array($this-&gt;toASCII[$charset])) <span class="keywordflow">return</span> 1;
<a name="l01302"></a>01302 
<a name="l01303"></a>01303                         <span class="comment">// Use cached version if possible</span>
<a name="l01304"></a>01304                 $cacheFile = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/csascii_'.$charset.'.tbl');
<a name="l01305"></a>01305                 <span class="keywordflow">if</span> ($cacheFile &amp;&amp; @is_file($cacheFile)) {
<a name="l01306"></a>01306                         $this-&gt;toASCII[$charset] = unserialize(t3lib_div::getUrl($cacheFile));
<a name="l01307"></a>01307                         <span class="keywordflow">return</span> 2;
<a name="l01308"></a>01308                 }
<a name="l01309"></a>01309 
<a name="l01310"></a>01310                         <span class="comment">// init UTF-8 conversion for this charset</span>
<a name="l01311"></a>01311                 <span class="keywordflow">if</span> (!$this-&gt;initCharset($charset))      {
<a name="l01312"></a>01312                         <span class="keywordflow">return</span> <span class="keyword">false</span>;
<a name="l01313"></a>01313                 }
<a name="l01314"></a>01314 
<a name="l01315"></a>01315                         <span class="comment">// UTF-8/ASCII transliteration is used as the base conversion table</span>
<a name="l01316"></a>01316                 <span class="keywordflow">if</span> (!$this-&gt;initUnicodeData('ascii'))   {
<a name="l01317"></a>01317                         <span class="keywordflow">return</span> <span class="keyword">false</span>;
<a name="l01318"></a>01318                 }
<a name="l01319"></a>01319 
<a name="l01320"></a>01320                 $nochar = chr($this-&gt;noCharByteVal);
<a name="l01321"></a>01321                 foreach ($this-&gt;parsedCharsets[$charset]['local'] as $ci =&gt; $utf8)      {
<a name="l01322"></a>01322                                 <span class="comment">// reconvert to charset (don't use chr() of numeric value, might be muli-byte)</span>
<a name="l01323"></a>01323                         $c = $this-&gt;utf8_decode($utf8, $charset);
<a name="l01324"></a>01324 
<a name="l01325"></a>01325                         <span class="keywordflow">if</span> (isset($this-&gt;toASCII['utf-8'][$utf8]))      {
<a name="l01326"></a>01326                                 $this-&gt;toASCII[$charset][$c] = $this-&gt;toASCII['utf-8'][$utf8];
<a name="l01327"></a>01327                         }
<a name="l01328"></a>01328                 }
<a name="l01329"></a>01329 
<a name="l01330"></a>01330                 <span class="keywordflow">if</span> ($cacheFile) {
<a name="l01331"></a>01331                                 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFile,serialize($this-&gt;toASCII[$charset]));
<a name="l01332"></a>01332                 }
<a name="l01333"></a>01333 
<a name="l01334"></a>01334                 <span class="keywordflow">return</span> 3;
<a name="l01335"></a>01335         }
<a name="l01336"></a>01336 
<a name="l01337"></a>01337 
<a name="l01338"></a>01338 
<a name="l01339"></a>01339 
<a name="l01340"></a>01340 
<a name="l01341"></a>01341 
<a name="l01342"></a>01342 
<a name="l01343"></a>01343 
<a name="l01344"></a>01344 
<a name="l01345"></a>01345 
<a name="l01346"></a>01346 
<a name="l01347"></a>01347 
<a name="l01348"></a>01348 
<a name="l01349"></a>01349 
<a name="l01350"></a>01350 
<a name="l01351"></a>01351 
<a name="l01352"></a>01352         <span class="comment">/********************************************</span>
<a name="l01353"></a>01353 <span class="comment">         *</span>
<a name="l01354"></a>01354 <span class="comment">         * String operation functions</span>
<a name="l01355"></a>01355 <span class="comment">         *</span>
<a name="l01356"></a>01356 <span class="comment">         ********************************************/</span>
<a name="l01357"></a>01357 
<a name="l01370"></a><a class="code" href="classt3lib__cs.html#68868a1e06c8f028dde56268b09ff92d">01370</a>         function substr($charset,$string,$start,$len=null)      {
<a name="l01371"></a>01371                 <span class="keywordflow">if</span> ($len===0)   <span class="keywordflow">return</span> '';
<a name="l01372"></a>01372 
<a name="l01373"></a>01373                 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
<a name="l01374"></a>01374                                 <span class="comment">// cannot omit $len, when specifying charset</span>
<a name="l01375"></a>01375                         <span class="keywordflow">if</span> ($len==null) {
<a name="l01376"></a>01376                                 $enc = mb_internal_encoding();  <span class="comment">// save internal encoding</span>
<a name="l01377"></a>01377                                 mb_internal_encoding($charset);
<a name="l01378"></a>01378                                 $str = mb_substr($string,$start);
<a name="l01379"></a>01379                                 mb_internal_encoding($enc);     <span class="comment">// restore internal encoding</span>
<a name="l01380"></a>01380 
<a name="l01381"></a>01381                                 <span class="keywordflow">return</span> $str;
<a name="l01382"></a>01382                         }
<a name="l01383"></a>01383                         <span class="keywordflow">else</span> {
<a name="l01384"></a>01384                                 <span class="keywordflow">return</span> mb_substr($string,$start,$len,$charset);
<a name="l01385"></a>01385                         }
<a name="l01386"></a>01386                 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv')      {
<a name="l01387"></a>01387                                 <span class="comment">// cannot omit $len, when specifying charset</span>
<a name="l01388"></a>01388                         <span class="keywordflow">if</span> ($len==null) {
<a name="l01389"></a>01389                                 $enc = iconv_get_encoding('internal_encoding'); <span class="comment">// save internal encoding</span>
<a name="l01390"></a>01390                                 iconv_set_encoding('internal_encoding',$charset);
<a name="l01391"></a>01391                                 $str = iconv_substr($string,$start);
<a name="l01392"></a>01392                                 iconv_set_encoding('internal_encoding',$enc);   <span class="comment">// restore internal encoding</span>
<a name="l01393"></a>01393 
<a name="l01394"></a>01394                                 <span class="keywordflow">return</span> $str;
<a name="l01395"></a>01395                         }
<a name="l01396"></a>01396                         <span class="keywordflow">else</span> {
<a name="l01397"></a>01397                                 <span class="keywordflow">return</span> iconv_substr($string,$start,$len,$charset);
<a name="l01398"></a>01398                         }
<a name="l01399"></a>01399                 } elseif ($charset == 'utf-8')  {
<a name="l01400"></a>01400                         <span class="keywordflow">return</span> $this-&gt;utf8_substr($string,$start,$len);
<a name="l01401"></a>01401                 } elseif ($this-&gt;eucBasedSets[$charset])        {
<a name="l01402"></a>01402                         <span class="keywordflow">return</span> $this-&gt;euc_substr($string,$start,$charset,$len);
<a name="l01403"></a>01403                 } elseif ($this-&gt;twoByteSets[$charset]) {
<a name="l01404"></a>01404                         <span class="keywordflow">return</span> substr($string,$start*2,$len*2);
<a name="l01405"></a>01405                 } elseif ($this-&gt;fourByteSets[$charset])        {
<a name="l01406"></a>01406                         <span class="keywordflow">return</span> substr($string,$start*4,$len*4);
<a name="l01407"></a>01407                 }
<a name="l01408"></a>01408 
<a name="l01409"></a>01409                 <span class="comment">// treat everything else as single-byte encoding</span>
<a name="l01410"></a>01410                 <span class="keywordflow">return</span> $len === NULL ? substr($string,$start) : substr($string,$start,$len);
<a name="l01411"></a>01411         }
<a name="l01412"></a>01412 
<a name="l01423"></a><a class="code" href="classt3lib__cs.html#ba3dbbe621b02266e154ea0dfa15247a">01423</a>         function strlen($charset,$string)       {
<a name="l01424"></a>01424                 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
<a name="l01425"></a>01425                         <span class="keywordflow">return</span> mb_strlen($string,$charset);
<a name="l01426"></a>01426                 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv')      {
<a name="l01427"></a>01427                         <span class="keywordflow">return</span> iconv_strlen($string,$charset);
<a name="l01428"></a>01428                 } elseif ($charset == 'utf-8')  {
<a name="l01429"></a>01429                         <span class="keywordflow">return</span> $this-&gt;utf8_strlen($string);
<a name="l01430"></a>01430                 } elseif ($this-&gt;eucBasedSets[$charset])        {
<a name="l01431"></a>01431                         <span class="keywordflow">return</span> $this-&gt;euc_strlen($string,$charset);
<a name="l01432"></a>01432                 } elseif ($this-&gt;twoByteSets[$charset]) {
<a name="l01433"></a>01433                         <span class="keywordflow">return</span> strlen($string)/2;
<a name="l01434"></a>01434                 } elseif ($this-&gt;fourByteSets[$charset])        {
<a name="l01435"></a>01435                         <span class="keywordflow">return</span> strlen($string)/4;
<a name="l01436"></a>01436                 }
<a name="l01437"></a>01437                 <span class="comment">// treat everything else as single-byte encoding</span>
<a name="l01438"></a>01438                 <span class="keywordflow">return</span> strlen($string);
<a name="l01439"></a>01439         }
<a name="l01440"></a>01440 
<a name="l01453"></a><a class="code" href="classt3lib__cs.html#baaa8766ceae14506fa6724e8540c86d">01453</a>         function crop($charset,$string,$len,$crop='')   {
<a name="l01454"></a>01454                 <span class="keywordflow">if</span> (intval($len) == 0)  <span class="keywordflow">return</span> $string;
<a name="l01455"></a>01455 
<a name="l01456"></a>01456                 <span class="keywordflow">if</span> ($charset == 'utf-8')        {
<a name="l01457"></a>01457                         $i = $this-&gt;utf8_char2byte_pos($string,$len);
<a name="l01458"></a>01458                 } elseif ($this-&gt;eucBasedSets[$charset])        {
<a name="l01459"></a>01459                         $i = $this-&gt;euc_char2byte_pos($string,$len,$charset);
<a name="l01460"></a>01460                 } <span class="keywordflow">else</span> {
<a name="l01461"></a>01461                         <span class="keywordflow">if</span> ($len &gt; 0)   {
<a name="l01462"></a>01462                                 $i = $len;
<a name="l01463"></a>01463                         } <span class="keywordflow">else</span> {
<a name="l01464"></a>01464                                 $i = strlen($string)+$len;
<a name="l01465"></a>01465                                 <span class="keywordflow">if</span> ($i&lt;=0)      $i = <span class="keyword">false</span>;
<a name="l01466"></a>01466                         }
<a name="l01467"></a>01467                 }
<a name="l01468"></a>01468 
<a name="l01469"></a>01469                 <span class="keywordflow">if</span> ($i === <span class="keyword">false</span>)       {       <span class="comment">// $len outside actual string length</span>
<a name="l01470"></a>01470                         <span class="keywordflow">return</span> $string;
<a name="l01471"></a>01471                 } <span class="keywordflow">else</span>  {
<a name="l01472"></a>01472                         <span class="keywordflow">if</span> ($len &gt; 0)   {
<a name="l01473"></a>01473                                 <span class="keywordflow">if</span> (strlen($string{$i}))        {
<a name="l01474"></a>01474                                         <span class="keywordflow">return</span> substr($string,0,$i).$crop;
<a name="l01475"></a>01475 
<a name="l01476"></a>01476                                 }
<a name="l01477"></a>01477                         } <span class="keywordflow">else</span> {
<a name="l01478"></a>01478                                 <span class="keywordflow">if</span> (strlen($string{$i-1}))      {
<a name="l01479"></a>01479                                         <span class="keywordflow">return</span> $crop.substr($string,$i);
<a name="l01480"></a>01480                                 }
<a name="l01481"></a>01481                         }
<a name="l01482"></a>01482 
<a name="l01483"></a>01483 <span class="comment">/*</span>
<a name="l01484"></a>01484 <span class="comment">                        if (abs($len)&lt;$this-&gt;strlen($charset,$string))  {       // Has to use -&gt;strlen() - otherwise multibyte strings ending with a multibyte char will return true here (which is not a catastrophe, but...)</span>
<a name="l01485"></a>01485 <span class="comment">                                if ($len &gt; 0)   {</span>
<a name="l01486"></a>01486 <span class="comment">                                        return substr($string,0,$i).$crop;</span>
<a name="l01487"></a>01487 <span class="comment">                                } else {</span>
<a name="l01488"></a>01488 <span class="comment">                                        return $crop.substr($string,$i);</span>
<a name="l01489"></a>01489 <span class="comment">                                }</span>
<a name="l01490"></a>01490 <span class="comment">                        }</span>
<a name="l01491"></a>01491 <span class="comment">*/</span>
<a name="l01492"></a>01492                 }
<a name="l01493"></a>01493                 <span class="keywordflow">return</span> $string;
<a name="l01494"></a>01494         }
<a name="l01495"></a>01495 
<a name="l01506"></a><a class="code" href="classt3lib__cs.html#356eb8d5573975163a9eeb76fdca38b9">01506</a>         function strtrunc($charset,$string,$len)        {
<a name="l01507"></a>01507                 <span class="keywordflow">if</span> ($len &lt;= 0)  <span class="keywordflow">return</span> '';
<a name="l01508"></a>01508 
<a name="l01509"></a>01509                 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
<a name="l01510"></a>01510                         <span class="keywordflow">return</span> mb_strcut($string,0,$len,$charset);
<a name="l01511"></a>01511                 } elseif ($charset == 'utf-8')  {
<a name="l01512"></a>01512                         <span class="keywordflow">return</span> $this-&gt;utf8_strtrunc($string,$len);
<a name="l01513"></a>01513                 } elseif ($this-&gt;eucBasedSets[$charset])        {
<a name="l01514"></a>01514                         <span class="keywordflow">return</span> $this-&gt;euc_strtrunc($string,$charset);
<a name="l01515"></a>01515                 } elseif ($this-&gt;twoByteSets[$charset]) {
<a name="l01516"></a>01516                         <span class="keywordflow">if</span> ($len % 2)   $len--;         <span class="comment">// don't cut at odd positions</span>
<a name="l01517"></a>01517                 } elseif ($this-&gt;fourByteSets[$charset])        {
<a name="l01518"></a>01518                         $x = $len % 4;
<a name="l01519"></a>01519                         $len -= $x;     <span class="comment">// realign to position dividable by four</span>
<a name="l01520"></a>01520                 }
<a name="l01521"></a>01521                 <span class="comment">// treat everything else as single-byte encoding</span>
<a name="l01522"></a>01522                 <span class="keywordflow">return</span> substr($string,0,$len);
<a name="l01523"></a>01523         }
<a name="l01524"></a>01524 
<a name="l01540"></a><a class="code" href="classt3lib__cs.html#9eb6df13082ebca55b30a039ec9c2a64">01540</a>         function conv_case($charset,$string,$case)      {
<a name="l01541"></a>01541                 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring' &amp;&amp; (<span class="keywordtype">float</span>)phpversion() &gt;= 4.3)   {
<a name="l01542"></a>01542                         <span class="keywordflow">if</span> ($case == 'toLower') {
<a name="l01543"></a>01543                                 $string = mb_strtolower($string,$charset);
<a name="l01544"></a>01544                         } <span class="keywordflow">else</span> {
<a name="l01545"></a>01545                                 $string = mb_strtoupper($string,$charset);
<a name="l01546"></a>01546                         }
<a name="l01547"></a>01547                 } elseif ($charset == 'utf-8')  {
<a name="l01548"></a>01548                         $string = $this-&gt;utf8_char_mapping($string,'<span class="keywordflow">case</span>',$case);
<a name="l01549"></a>01549                 } elseif (isset($this-&gt;eucBasedSets[$charset])) {
<a name="l01550"></a>01550                         $string = $this-&gt;euc_char_mapping($string,$charset,'<span class="keywordflow">case</span>',$case);
<a name="l01551"></a>01551                 } <span class="keywordflow">else</span> {
<a name="l01552"></a>01552                                 <span class="comment">// treat everything else as single-byte encoding</span>
<a name="l01553"></a>01553                         $string = $this-&gt;sb_char_mapping($string,$charset,'<span class="keywordflow">case</span>',$case);
<a name="l01554"></a>01554                 }
<a name="l01555"></a>01555 
<a name="l01556"></a>01556                 <span class="keywordflow">return</span> $string;
<a name="l01557"></a>01557         }
<a name="l01558"></a>01558 
<a name="l01566"></a><a class="code" href="classt3lib__cs.html#a3eb390354ec2121ec912583416fefe7">01566</a>         function specCharsToASCII($charset,$string)     {
<a name="l01567"></a>01567                 <span class="keywordflow">if</span> ($charset == 'utf-8')        {
<a name="l01568"></a>01568                         $string = $this-&gt;utf8_char_mapping($string,'ascii');
<a name="l01569"></a>01569                 } elseif (isset($this-&gt;eucBasedSets[$charset])) {
<a name="l01570"></a>01570                         $string = $this-&gt;euc_char_mapping($string,$charset,'ascii');
<a name="l01571"></a>01571                 } <span class="keywordflow">else</span> {
<a name="l01572"></a>01572                                 <span class="comment">// treat everything else as single-byte encoding</span>
<a name="l01573"></a>01573                         $string = $this-&gt;sb_char_mapping($string,$charset,'ascii');
<a name="l01574"></a>01574                 }
<a name="l01575"></a>01575 
<a name="l01576"></a>01576                 <span class="keywordflow">return</span> $string;
<a name="l01577"></a>01577         }
<a name="l01578"></a>01578 
<a name="l01579"></a>01579 
<a name="l01580"></a>01580 
<a name="l01581"></a>01581 
<a name="l01582"></a>01582 
<a name="l01583"></a>01583 
<a name="l01584"></a>01584 
<a name="l01585"></a>01585 
<a name="l01586"></a>01586 
<a name="l01587"></a>01587 
<a name="l01588"></a>01588 
<a name="l01589"></a>01589 
<a name="l01590"></a>01590         <span class="comment">/********************************************</span>
<a name="l01591"></a>01591 <span class="comment">         *</span>
<a name="l01592"></a>01592 <span class="comment">         * Internal string operation functions</span>
<a name="l01593"></a>01593 <span class="comment">         *</span>
<a name="l01594"></a>01594 <span class="comment">         ********************************************/</span>
<a name="l01595"></a>01595 
<a name="l01606"></a><a class="code" href="classt3lib__cs.html#67e70dcda5973aef6b9f3740f658561f">01606</a>         function sb_char_mapping($str,$charset,$mode,$opt='')   {
<a name="l01607"></a>01607                 <span class="keywordflow">switch</span>($mode)   {
<a name="l01608"></a>01608                         <span class="keywordflow">case</span> '<span class="keywordflow">case</span>':
<a name="l01609"></a>01609                                 <span class="keywordflow">if</span> (!$this-&gt;initCaseFolding($charset))  <span class="keywordflow">return</span> $str;    <span class="comment">// do nothing</span>
<a name="l01610"></a>01610                                 $map =&amp; $this-&gt;caseFolding[$charset][$opt];
<a name="l01611"></a>01611                                 <span class="keywordflow">break</span>;
<a name="l01612"></a>01612 
<a name="l01613"></a>01613                         <span class="keywordflow">case</span> 'ascii':
<a name="l01614"></a>01614                                 <span class="keywordflow">if</span> (!$this-&gt;initToASCII($charset))      <span class="keywordflow">return</span> $str;    <span class="comment">// do nothing</span>
<a name="l01615"></a>01615                                 $map =&amp; $this-&gt;toASCII[$charset];
<a name="l01616"></a>01616                                 <span class="keywordflow">break</span>;
<a name="l01617"></a>01617 
<a name="l01618"></a>01618                         <span class="keywordflow">default</span>:
<a name="l01619"></a>01619                                 <span class="keywordflow">return</span> $str;
<a name="l01620"></a>01620                 }
<a name="l01621"></a>01621 
<a name="l01622"></a>01622                 $out = '';
<a name="l01623"></a>01623                 <span class="keywordflow">for</span>($i=0; strlen($str{$i}); $i++)       {
<a name="l01624"></a>01624                         $c = $str{$i};
<a name="l01625"></a>01625                         <span class="keywordflow">if</span> (isset($map[$c]))    {
<a name="l01626"></a>01626                                 $out .= $map[$c];
<a name="l01627"></a>01627                         } <span class="keywordflow">else</span> {
<a name="l01628"></a>01628                                 $out .= $c;
<a name="l01629"></a>01629                         }
<a name="l01630"></a>01630                 }
<a name="l01631"></a>01631 
<a name="l01632"></a>01632                 <span class="keywordflow">return</span> $out;
<a name="l01633"></a>01633         }
<a name="l01634"></a>01634 
<a name="l01635"></a>01635 
<a name="l01636"></a>01636 
<a name="l01637"></a>01637 
<a name="l01638"></a>01638 
<a name="l01639"></a>01639 
<a name="l01640"></a>01640 
<a name="l01641"></a>01641 
<a name="l01642"></a>01642 
<a name="l01643"></a>01643 
<a name="l01644"></a>01644         <span class="comment">/********************************************</span>
<a name="l01645"></a>01645 <span class="comment">         *</span>
<a name="l01646"></a>01646 <span class="comment">         * Internal UTF-8 string operation functions</span>
<a name="l01647"></a>01647 <span class="comment">         *</span>
<a name="l01648"></a>01648 <span class="comment">         ********************************************/</span>
<a name="l01649"></a>01649 
<a name="l01661"></a><a class="code" href="classt3lib__cs.html#3d99778bbc250f606478170343556072">01661</a>         function utf8_substr($str,$start,$len=null)     {
<a name="l01662"></a>01662                 <span class="keywordflow">if</span> (!strcmp($len,<span class="charliteral">'0'</span>))  <span class="keywordflow">return</span> '';
<a name="l01663"></a>01663 
<a name="l01664"></a>01664                 $byte_start = $this-&gt;utf8_char2byte_pos($str,$start);
<a name="l01665"></a>01665                 <span class="keywordflow">if</span> ($byte_start === <span class="keyword">false</span>)      {
<a name="l01666"></a>01666                         <span class="keywordflow">if</span> ($start &gt; 0) {
<a name="l01667"></a>01667                                 <span class="keywordflow">return</span> <span class="keyword">false</span>;   <span class="comment">// $start outside string length</span>
<a name="l01668"></a>01668                         } <span class="keywordflow">else</span> {
<a name="l01669"></a>01669                                 $start = 0;
<a name="l01670"></a>01670                         }
<a name="l01671"></a>01671                 }
<a name="l01672"></a>01672 
<a name="l01673"></a>01673                 $str = substr($str,$byte_start);
<a name="l01674"></a>01674 
<a name="l01675"></a>01675                 <span class="keywordflow">if</span> ($len!=null) {
<a name="l01676"></a>01676                         $byte_end = $this-&gt;utf8_char2byte_pos($str,$len);
<a name="l01677"></a>01677                         <span class="keywordflow">if</span> ($byte_end === <span class="keyword">false</span>)        <span class="comment">// $len outside actual string length</span>
<a name="l01678"></a>01678                                 <span class="keywordflow">return</span> $len&lt;0 ? '' : $str;      <span class="comment">// When length is less than zero and exceeds, then we return blank string.</span>
<a name="l01679"></a>01679                         <span class="keywordflow">else</span>
<a name="l01680"></a>01680                                 <span class="keywordflow">return</span> substr($str,0,$byte_end);
<a name="l01681"></a>01681                 }
<a name="l01682"></a>01682                 <span class="keywordflow">else</span>    <span class="keywordflow">return</span> $str;
<a name="l01683"></a>01683         }
<a name="l01684"></a>01684 
<a name="l01694"></a><a class="code" href="classt3lib__cs.html#7962ae7eebe4d5d3d7664b75b8b30efe">01694</a>         function utf8_strlen($str)      {
<a name="l01695"></a>01695                 $n=0;
<a name="l01696"></a>01696                 <span class="keywordflow">for</span>($i=0; strlen($str{$i}); $i++)       {
<a name="l01697"></a>01697                         $c = ord($str{$i});
<a name="l01698"></a>01698                         <span class="keywordflow">if</span> (!($c &amp; 0x80))       <span class="comment">// single-byte (0xxxxxx)</span>
<a name="l01699"></a>01699                                 $n++;
<a name="l01700"></a>01700                         elseif (($c &amp; 0xC0) == 0xC0)    <span class="comment">// multi-byte starting byte (11xxxxxx)</span>
<a name="l01701"></a>01701                                 $n++;
<a name="l01702"></a>01702                 }
<a name="l01703"></a>01703                 <span class="keywordflow">return</span> $n;
<a name="l01704"></a>01704         }
<a name="l01705"></a>01705 
<a name="l01715"></a><a class="code" href="classt3lib__cs.html#65720073d9310f352eb0787048c0e033">01715</a>         function utf8_strtrunc($str,$len)       {
<a name="l01716"></a>01716                 $i = $len-1;
<a name="l01717"></a>01717                 <span class="keywordflow">if</span> (ord($str{$i}) &amp; 0x80) { <span class="comment">// part of a multibyte sequence</span>
<a name="l01718"></a>01718                         <span class="keywordflow">for</span> (; $i&gt;0 &amp;&amp; !(ord($str{$i}) &amp; 0x40); $i--)   ;       <span class="comment">// find the first byte</span>
<a name="l01719"></a>01719                         <span class="keywordflow">if</span> ($i &lt;= 0)    <span class="keywordflow">return</span> ''; <span class="comment">// sanity check</span>
<a name="l01720"></a>01720                         <span class="keywordflow">for</span> ($bc=0, $mbs=ord($str{$i}); $mbs &amp; 0x80; $mbs = $mbs &lt;&lt; 1)  $bc++;  <span class="comment">// calculate number of bytes</span>
<a name="l01721"></a>01721                         <span class="keywordflow">if</span> ($bc+$i &gt; $len)      <span class="keywordflow">return</span> substr($str,0,$i);
<a name="l01722"></a>01722                         <span class="comment">// fallthru: multibyte char fits into length</span>
<a name="l01723"></a>01723                 }
<a name="l01724"></a>01724                 <span class="keywordflow">return</span> substr($str,0,$len);
<a name="l01725"></a>01725         }
<a name="l01726"></a>01726 
<a name="l01737"></a><a class="code" href="classt3lib__cs.html#61b51591560e3742472e705d9b60fc50">01737</a>         function utf8_strpos($haystack,$needle,$offset=0)       {
<a name="l01738"></a>01738                 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
<a name="l01739"></a>01739                         <span class="keywordflow">return</span> mb_strpos($haystack,$needle,$offset,'utf-8');
<a name="l01740"></a>01740                 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv')      {
<a name="l01741"></a>01741                         <span class="keywordflow">return</span> iconv_strpos($haystack,$needle,$offset,'utf-8');
<a name="l01742"></a>01742                 }
<a name="l01743"></a>01743 
<a name="l01744"></a>01744                 $byte_offset = $this-&gt;utf8_char2byte_pos($haystack,$offset);
<a name="l01745"></a>01745                 <span class="keywordflow">if</span> ($byte_offset === <span class="keyword">false</span>)     <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// offset beyond string length</span>
<a name="l01746"></a>01746 
<a name="l01747"></a>01747                 $byte_pos = strpos($haystack,$needle,$byte_offset);
<a name="l01748"></a>01748                 <span class="keywordflow">if</span> ($byte_pos === <span class="keyword">false</span>)        <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// needle not found</span>
<a name="l01749"></a>01749 
<a name="l01750"></a>01750                 <span class="keywordflow">return</span> $this-&gt;utf8_byte2char_pos($haystack,$byte_pos);
<a name="l01751"></a>01751         }
<a name="l01752"></a>01752 
<a name="l01762"></a><a class="code" href="classt3lib__cs.html#030d8d8b016c54cf229ec7e9b5f0d177">01762</a>         function utf8_strrpos($haystack,$needle)        {
<a name="l01763"></a>01763                 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') {
<a name="l01764"></a>01764                         <span class="keywordflow">return</span> mb_strrpos($haystack,$needle,'utf-8');
<a name="l01765"></a>01765                 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv')      {
<a name="l01766"></a>01766                         <span class="keywordflow">return</span> iconv_strrpos($haystack,$needle,'utf-8');
<a name="l01767"></a>01767                 }
<a name="l01768"></a>01768 
<a name="l01769"></a>01769                 $byte_pos = strrpos($haystack,$needle);
<a name="l01770"></a>01770                 <span class="keywordflow">if</span> ($byte_pos === <span class="keyword">false</span>)        <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// needle not found</span>
<a name="l01771"></a>01771 
<a name="l01772"></a>01772                 <span class="keywordflow">return</span> $this-&gt;utf8_byte2char_pos($haystack,$byte_pos);
<a name="l01773"></a>01773         }
<a name="l01774"></a>01774 
<a name="l01784"></a><a class="code" href="classt3lib__cs.html#9c53f7d3d136ece0860f8ac2b567c173">01784</a>         function utf8_char2byte_pos($str,$pos)  {
<a name="l01785"></a>01785                 $n = 0;                         <span class="comment">// number of characters found</span>
<a name="l01786"></a>01786                 $p = abs($pos);         <span class="comment">// number of characters wanted</span>
<a name="l01787"></a>01787 
<a name="l01788"></a>01788                 <span class="keywordflow">if</span> ($pos &gt;= 0)  {
<a name="l01789"></a>01789                         $i = 0;
<a name="l01790"></a>01790                         $d = 1;
<a name="l01791"></a>01791                 } <span class="keywordflow">else</span> {
<a name="l01792"></a>01792                         $i = strlen($str)-1;
<a name="l01793"></a>01793                         $d = -1;
<a name="l01794"></a>01794                 }
<a name="l01795"></a>01795 
<a name="l01796"></a>01796                 <span class="keywordflow">for</span>( ; strlen($str{$i}) &amp;&amp; $n&lt;$p; $i+=$d)       {
<a name="l01797"></a>01797                         $c = (<span class="keywordtype">int</span>)ord($str{$i});
<a name="l01798"></a>01798                         <span class="keywordflow">if</span> (!($c &amp; 0x80))       <span class="comment">// single-byte (0xxxxxx)</span>
<a name="l01799"></a>01799                                 $n++;
<a name="l01800"></a>01800                         elseif (($c &amp; 0xC0) == 0xC0)    <span class="comment">// multi-byte starting byte (11xxxxxx)</span>
<a name="l01801"></a>01801                                 $n++;
<a name="l01802"></a>01802                 }
<a name="l01803"></a>01803                 <span class="keywordflow">if</span> (!strlen($str{$i}))  <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// offset beyond string length</span>
<a name="l01804"></a>01804 
<a name="l01805"></a>01805                 <span class="keywordflow">if</span> ($pos &gt;= 0)  {
<a name="l01806"></a>01806                                 <span class="comment">// skip trailing multi-byte data bytes</span>
<a name="l01807"></a>01807                         <span class="keywordflow">while</span> ((ord($str{$i}) &amp; 0x80) &amp;&amp; !(ord($str{$i}) &amp; 0x40)) { $i++; }
<a name="l01808"></a>01808                 } <span class="keywordflow">else</span> {
<a name="l01809"></a>01809                                 <span class="comment">// correct offset</span>
<a name="l01810"></a>01810                         $i++;
<a name="l01811"></a>01811                 }
<a name="l01812"></a>01812 
<a name="l01813"></a>01813                 <span class="keywordflow">return</span> $i;
<a name="l01814"></a>01814         }
<a name="l01815"></a>01815 
<a name="l01825"></a><a class="code" href="classt3lib__cs.html#f4203ca19d10582e50283e97445869a8">01825</a>         function utf8_byte2char_pos($str,$pos)  {
<a name="l01826"></a>01826                 $n = 0; <span class="comment">// number of characters</span>
<a name="l01827"></a>01827                 <span class="keywordflow">for</span>($i=$pos; $i&gt;0; $i--)        {
<a name="l01828"></a>01828                         $c = (<span class="keywordtype">int</span>)ord($str{$i});
<a name="l01829"></a>01829                         <span class="keywordflow">if</span> (!($c &amp; 0x80))       <span class="comment">// single-byte (0xxxxxx)</span>
<a name="l01830"></a>01830                                 $n++;
<a name="l01831"></a>01831                         elseif (($c &amp; 0xC0) == 0xC0)    <span class="comment">// multi-byte starting byte (11xxxxxx)</span>
<a name="l01832"></a>01832                                 $n++;
<a name="l01833"></a>01833                 }
<a name="l01834"></a>01834                 <span class="keywordflow">if</span> (!strlen($str{$i}))  <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// offset beyond string length</span>
<a name="l01835"></a>01835 
<a name="l01836"></a>01836                 <span class="keywordflow">return</span> $n;
<a name="l01837"></a>01837         }
<a name="l01838"></a>01838 
<a name="l01848"></a><a class="code" href="classt3lib__cs.html#7c55e3bb2f0de48829aa84decb9f3921">01848</a>         function utf8_char_mapping($str,$mode,$opt='')  {
<a name="l01849"></a>01849                 <span class="keywordflow">if</span> (!$this-&gt;initUnicodeData($mode))     <span class="keywordflow">return</span> $str;    <span class="comment">// do nothing</span>
<a name="l01850"></a>01850 
<a name="l01851"></a>01851                 $out = '';
<a name="l01852"></a>01852                 <span class="keywordflow">switch</span>($mode)   {
<a name="l01853"></a>01853                         <span class="keywordflow">case</span> '<span class="keywordflow">case</span>':
<a name="l01854"></a>01854                                 $map =&amp; $this-&gt;caseFolding['utf-8'][$opt];
<a name="l01855"></a>01855                                 <span class="keywordflow">break</span>;
<a name="l01856"></a>01856 
<a name="l01857"></a>01857                         <span class="keywordflow">case</span> 'ascii':
<a name="l01858"></a>01858                                 $map =&amp; $this-&gt;toASCII['utf-8'];
<a name="l01859"></a>01859                                 <span class="keywordflow">break</span>;
<a name="l01860"></a>01860 
<a name="l01861"></a>01861                         <span class="keywordflow">default</span>:
<a name="l01862"></a>01862                                 <span class="keywordflow">return</span> $str;
<a name="l01863"></a>01863                 }
<a name="l01864"></a>01864 
<a name="l01865"></a>01865                 <span class="keywordflow">for</span>($i=0; strlen($str{$i}); $i++)       {
<a name="l01866"></a>01866                         $c = ord($str{$i});
<a name="l01867"></a>01867                         <span class="keywordflow">if</span> (!($c &amp; 0x80))       <span class="comment">// single-byte (0xxxxxx)</span>
<a name="l01868"></a>01868                                 $mbc = $str{$i};
<a name="l01869"></a>01869                         elseif (($c &amp; 0xC0) == 0xC0)    {       <span class="comment">// multi-byte starting byte (11xxxxxx)</span>
<a name="l01870"></a>01870                                 <span class="keywordflow">for</span> ($bc=0; $c &amp; 0x80; $c = $c &lt;&lt; 1) { $bc++; } <span class="comment">// calculate number of bytes</span>
<a name="l01871"></a>01871                                 $mbc = substr($str,$i,$bc);
<a name="l01872"></a>01872                                 $i += $bc-1;
<a name="l01873"></a>01873                         }
<a name="l01874"></a>01874 
<a name="l01875"></a>01875                         <span class="keywordflow">if</span> (isset($map[$mbc]))  {
<a name="l01876"></a>01876                                 $out .= $map[$mbc];
<a name="l01877"></a>01877                         } <span class="keywordflow">else</span> {
<a name="l01878"></a>01878                                 $out .= $mbc;
<a name="l01879"></a>01879                         }
<a name="l01880"></a>01880                 }
<a name="l01881"></a>01881 
<a name="l01882"></a>01882                 <span class="keywordflow">return</span> $out;
<a name="l01883"></a>01883         }
<a name="l01884"></a>01884 
<a name="l01885"></a>01885 
<a name="l01886"></a>01886 
<a name="l01887"></a>01887 
<a name="l01888"></a>01888 
<a name="l01889"></a>01889 
<a name="l01890"></a>01890 
<a name="l01891"></a>01891 
<a name="l01892"></a>01892 
<a name="l01893"></a>01893 
<a name="l01894"></a>01894 
<a name="l01895"></a>01895 
<a name="l01896"></a>01896 
<a name="l01897"></a>01897 
<a name="l01898"></a>01898 
<a name="l01899"></a>01899 
<a name="l01900"></a>01900 
<a name="l01901"></a>01901 
<a name="l01902"></a>01902         <span class="comment">/********************************************</span>
<a name="l01903"></a>01903 <span class="comment">         *</span>
<a name="l01904"></a>01904 <span class="comment">         * Internal EUC string operation functions</span>
<a name="l01905"></a>01905 <span class="comment">         *</span>
<a name="l01906"></a>01906 <span class="comment">         * Extended Unix Code:</span>
<a name="l01907"></a>01907 <span class="comment">         *  ASCII compatible 7bit single bytes chars</span>
<a name="l01908"></a>01908 <span class="comment">         *  8bit two byte chars</span>
<a name="l01909"></a>01909 <span class="comment">         *</span>
<a name="l01910"></a>01910 <span class="comment">         * Shift-JIS is treated as a special case.</span>
<a name="l01911"></a>01911 <span class="comment">         *</span>
<a name="l01912"></a>01912 <span class="comment">         ********************************************/</span>
<a name="l01913"></a>01913 
<a name="l01924"></a><a class="code" href="classt3lib__cs.html#f3f6679f979e1445585ebb4d1228bd59">01924</a>         function euc_strtrunc($str,$len,$charset)        {
<a name="l01925"></a>01925                 $sjis = ($charset == 'shift_jis');
<a name="l01926"></a>01926                 <span class="keywordflow">for</span> ($i=0; strlen($str{$i}) &amp;&amp; $i&lt;$len; $i++) {
<a name="l01927"></a>01927                         $c = ord($str{$i});
<a name="l01928"></a>01928                         <span class="keywordflow">if</span> ($sjis)      {
<a name="l01929"></a>01929                                 <span class="keywordflow">if</span> (($c &gt;= 0x80 &amp;&amp; $c &lt; 0xA0) || ($c &gt;= 0xE0))  $i++;   <span class="comment">// advance a double-byte char</span>
<a name="l01930"></a>01930                         }
<a name="l01931"></a>01931                         <span class="keywordflow">else</span>    {
<a name="l01932"></a>01932                                 <span class="keywordflow">if</span> ($c &gt;= 0x80) $i++;   <span class="comment">// advance a double-byte char</span>
<a name="l01933"></a>01933                         }
<a name="l01934"></a>01934                 }
<a name="l01935"></a>01935                 <span class="keywordflow">if</span> (!strlen($str{$i}))  <span class="keywordflow">return</span> $str;    <span class="comment">// string shorter than supplied length</span>
<a name="l01936"></a>01936 
<a name="l01937"></a>01937                 <span class="keywordflow">if</span> ($i&gt;$len)
<a name="l01938"></a>01938                         <span class="keywordflow">return</span> substr($str,0,$len-1);   <span class="comment">// we ended on a first byte</span>
<a name="l01939"></a>01939                 <span class="keywordflow">else</span>
<a name="l01940"></a>01940                         <span class="keywordflow">return</span> substr($str,0,$len);
<a name="l01941"></a>01941         }
<a name="l01942"></a>01942 
<a name="l01953"></a><a class="code" href="classt3lib__cs.html#df152592ef0a039f8d6d2fee3791cc18">01953</a>         function euc_substr($str,$start,$charset,$len=null)     {
<a name="l01954"></a>01954                 $byte_start = $this-&gt;euc_char2byte_pos($str,$start,$charset);
<a name="l01955"></a>01955                 <span class="keywordflow">if</span> ($byte_start === <span class="keyword">false</span>)      <span class="keywordflow">return</span> <span class="keyword">false</span>;   <span class="comment">// $start outside string length</span>
<a name="l01956"></a>01956 
<a name="l01957"></a>01957                 $str = substr($str,$byte_start);
<a name="l01958"></a>01958 
<a name="l01959"></a>01959                 <span class="keywordflow">if</span> ($len!=null) {
<a name="l01960"></a>01960                         $byte_end = $this-&gt;euc_char2byte_pos($str,$len,$charset);
<a name="l01961"></a>01961                         <span class="keywordflow">if</span> ($byte_end === <span class="keyword">false</span>)        <span class="comment">// $len outside actual string length</span>
<a name="l01962"></a>01962                                 <span class="keywordflow">return</span> $str;
<a name="l01963"></a>01963                         <span class="keywordflow">else</span>
<a name="l01964"></a>01964                                 <span class="keywordflow">return</span> substr($str,0,$byte_end);
<a name="l01965"></a>01965                 }
<a name="l01966"></a>01966                 <span class="keywordflow">else</span>    <span class="keywordflow">return</span> $str;
<a name="l01967"></a>01967         }
<a name="l01968"></a>01968 
<a name="l01978"></a><a class="code" href="classt3lib__cs.html#3901ba21b7a765b9beb8ba094133559b">01978</a>         function euc_strlen($str,$charset)       {
<a name="l01979"></a>01979                 $sjis = ($charset == 'shift_jis');
<a name="l01980"></a>01980                 $n=0;
<a name="l01981"></a>01981                 <span class="keywordflow">for</span> ($i=0; strlen($str{$i}); $i++) {
<a name="l01982"></a>01982                         $c = ord($str{$i});
<a name="l01983"></a>01983                         <span class="keywordflow">if</span> ($sjis)      {
<a name="l01984"></a>01984                                 <span class="keywordflow">if</span> (($c &gt;= 0x80 &amp;&amp; $c &lt; 0xA0) || ($c &gt;= 0xE0))  $i++;   <span class="comment">// advance a double-byte char</span>
<a name="l01985"></a>01985                         }
<a name="l01986"></a>01986                         <span class="keywordflow">else</span>    {
<a name="l01987"></a>01987                                 <span class="keywordflow">if</span> ($c &gt;= 0x80) $i++;   <span class="comment">// advance a double-byte char</span>
<a name="l01988"></a>01988                         }
<a name="l01989"></a>01989 
<a name="l01990"></a>01990                         $n++;
<a name="l01991"></a>01991                 }
<a name="l01992"></a>01992 
<a name="l01993"></a>01993                 <span class="keywordflow">return</span> $n;
<a name="l01994"></a>01994         }
<a name="l01995"></a>01995 
<a name="l02005"></a><a class="code" href="classt3lib__cs.html#a8ea3e301a55d7e9a40d952d4b9e05dd">02005</a>         function euc_char2byte_pos($str,$pos,$charset)  {
<a name="l02006"></a>02006                 $sjis = ($charset == 'shift_jis');
<a name="l02007"></a>02007                 $n = 0; <span class="comment">// number of characters seen</span>
<a name="l02008"></a>02008                 $p = abs($pos); <span class="comment">// number of characters wanted</span>
<a name="l02009"></a>02009 
<a name="l02010"></a>02010                 <span class="keywordflow">if</span> ($pos &gt;= 0)  {
<a name="l02011"></a>02011                         $i = 0;
<a name="l02012"></a>02012                         $d = 1;
<a name="l02013"></a>02013                 } <span class="keywordflow">else</span> {
<a name="l02014"></a>02014                         $i = strlen($str)-1;
<a name="l02015"></a>02015                         $d = -1;
<a name="l02016"></a>02016                 }
<a name="l02017"></a>02017 
<a name="l02018"></a>02018                 <span class="keywordflow">for</span> ( ; strlen($str{$i}) &amp;&amp; $n&lt;$p; $i+=$d) {
<a name="l02019"></a>02019                         $c = ord($str{$i});
<a name="l02020"></a>02020                         <span class="keywordflow">if</span> ($sjis)      {
<a name="l02021"></a>02021                                 <span class="keywordflow">if</span> (($c &gt;= 0x80 &amp;&amp; $c &lt; 0xA0) || ($c &gt;= 0xE0))  $i+=$d; <span class="comment">// advance a double-byte char</span>
<a name="l02022"></a>02022                         }
<a name="l02023"></a>02023                         <span class="keywordflow">else</span>    {
<a name="l02024"></a>02024                                 <span class="keywordflow">if</span> ($c &gt;= 0x80) $i+=$d; <span class="comment">// advance a double-byte char</span>
<a name="l02025"></a>02025                         }
<a name="l02026"></a>02026 
<a name="l02027"></a>02027                         $n++;
<a name="l02028"></a>02028                 }
<a name="l02029"></a>02029                 <span class="keywordflow">if</span> (!strlen($str{$i}))  <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// offset beyond string length</span>
<a name="l02030"></a>02030 
<a name="l02031"></a>02031                 <span class="keywordflow">if</span> ($pos &lt; 0)   $i++;   <span class="comment">// correct offset</span>
<a name="l02032"></a>02032 
<a name="l02033"></a>02033                 <span class="keywordflow">return</span> $i;
<a name="l02034"></a>02034         }
<a name="l02035"></a>02035 
<a name="l02046"></a><a class="code" href="classt3lib__cs.html#37a9c924fb11389373cbead3f6bbe5a0">02046</a>         function euc_char_mapping($str,$charset,$mode,$opt='')  {
<a name="l02047"></a>02047                 <span class="keywordflow">switch</span>($mode)   {
<a name="l02048"></a>02048                         <span class="keywordflow">case</span> '<span class="keywordflow">case</span>':
<a name="l02049"></a>02049                                 <span class="keywordflow">if</span> (!$this-&gt;initCaseFolding($charset))  <span class="keywordflow">return</span> $str;    <span class="comment">// do nothing</span>
<a name="l02050"></a>02050                                 $map =&amp; $this-&gt;caseFolding[$charset][$opt];
<a name="l02051"></a>02051                                 <span class="keywordflow">break</span>;
<a name="l02052"></a>02052 
<a name="l02053"></a>02053                         <span class="keywordflow">case</span> 'ascii':
<a name="l02054"></a>02054                                 <span class="keywordflow">if</span> (!$this-&gt;initToASCII($charset))      <span class="keywordflow">return</span> $str;    <span class="comment">// do nothing</span>
<a name="l02055"></a>02055                                 $map =&amp; $this-&gt;toASCII[$charset];
<a name="l02056"></a>02056                                 <span class="keywordflow">break</span>;
<a name="l02057"></a>02057 
<a name="l02058"></a>02058                         <span class="keywordflow">default</span>:
<a name="l02059"></a>02059                                 <span class="keywordflow">return</span> $str;
<a name="l02060"></a>02060                 }
<a name="l02061"></a>02061 
<a name="l02062"></a>02062                 $sjis = ($charset == 'shift_jis');
<a name="l02063"></a>02063                 $out = '';
<a name="l02064"></a>02064                 <span class="keywordflow">for</span>($i=0; strlen($str{$i}); $i++)       {
<a name="l02065"></a>02065                         $mbc = $str{$i};
<a name="l02066"></a>02066                         $c = ord($mbc);
<a name="l02067"></a>02067 
<a name="l02068"></a>02068                         <span class="keywordflow">if</span> ($sjis)      {
<a name="l02069"></a>02069                                 <span class="keywordflow">if</span> (($c &gt;= 0x80 &amp;&amp; $c &lt; 0xA0) || ($c &gt;= 0xE0))  {       <span class="comment">// a double-byte char</span>
<a name="l02070"></a>02070                                         $mbc = substr($str,$i,2);
<a name="l02071"></a>02071                                         $i++;
<a name="l02072"></a>02072                                 }
<a name="l02073"></a>02073                         }
<a name="l02074"></a>02074                         <span class="keywordflow">else</span>    {
<a name="l02075"></a>02075                                 <span class="keywordflow">if</span> ($c &gt;= 0x80) {       <span class="comment">// a double-byte char</span>
<a name="l02076"></a>02076                                         $mbc = substr($str,$i,2);
<a name="l02077"></a>02077                                         $i++;
<a name="l02078"></a>02078                                 }
<a name="l02079"></a>02079                         }
<a name="l02080"></a>02080 
<a name="l02081"></a>02081                         <span class="keywordflow">if</span> (isset($map[$mbc]))  {
<a name="l02082"></a>02082                                 $out .= $map[$mbc];
<a name="l02083"></a>02083                         } <span class="keywordflow">else</span> {
<a name="l02084"></a>02084                                 $out .= $mbc;
<a name="l02085"></a>02085                         }
<a name="l02086"></a>02086                 }
<a name="l02087"></a>02087 
<a name="l02088"></a>02088                 <span class="keywordflow">return</span> $out;
<a name="l02089"></a>02089         }
<a name="l02090"></a>02090 
<a name="l02091"></a>02091 }
<a name="l02092"></a>02092 
<a name="l02093"></a>02093 <span class="keywordflow">if</span> (defined('TYPO3_MODE') &amp;&amp; $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/<span class="keyword">class</span>.<a class="code" href="classt3lib__cs.html">t3lib_cs</a>.php'])        {
<a name="l02094"></a>02094         include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/<span class="keyword">class</span>.<a class="code" href="classt3lib__cs.html">t3lib_cs</a>.php']);
<a name="l02095"></a>02095 }
<a name="l02096"></a>02096 ?&gt;
</pre></div><?php
  include_once '../doc-typo3-funcs.php';
  get_footer();
?>