<?php include_once '../doc-typo3-funcs.php'; $doxygen_vars = array( "title" => "TYPO3 4.0.1: typo3_src-4.0.1/t3lib/class.t3lib_cs.php Source File", "datetime" => "Sat Dec 2 19:22:17 2006", "date" => "2 Dec 2006", "doxygenversion" => "1.4.6", "projectname" => "TYPO3 4.0.1", "projectnumber" => "4.0.1" ); get_header($doxygen_vars); ?> <!-- Generated by Doxygen 1.4.6 --> <div class="tabs"> <ul> <li><a href="main.html"><span>Main Page</span></a></li> <li><a href="namespaces.html"><span>Namespaces</span></a></li> <li><a href="classes.html"><span>Classes</span></a></li> <li id="current"><a href="files.html"><span>Files</span></a></li> <li><a href="dirs.html"><span>Directories</span></a></li> <li><a href="pages.html"><span>Related Pages</span></a></li> <li><a href="examples.html"><span>Examples</span></a></li> <li> <form action="search.php" method="get"> <table cellspacing="0" cellpadding="0" border="0"> <tr> <td><label> <u>S</u>earch for </label></td> <td><input type="text" name="query" value="" size="20" accesskey="s"/></td> </tr> </table> </form> </li> </ul></div> <div class="nav"> <a class="el" href="dir_c8daf1ad746050abf985cc546c89e248.html">typo3_src-4.0.1</a> » <a class="el" href="dir_9d0e5c424a38b69aeeedc616a9634e5f.html">t3lib</a></div> <h1>class.t3lib_cs.php</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <?php <a name="l00002"></a>00002 <span class="comment">/***************************************************************</span> <a name="l00003"></a>00003 <span class="comment">* Copyright notice</span> <a name="l00004"></a>00004 <span class="comment">*</span> <a name="l00005"></a>00005 <span class="comment">* (c) 2003-2006 Kasper Skaarhoj (kasperYYYY@typo3.com)</span> <a name="l00006"></a>00006 <span class="comment">* All rights reserved</span> <a name="l00007"></a>00007 <span class="comment">*</span> <a name="l00008"></a>00008 <span class="comment">* This script is part of the Typo3 project. The Typo3 project is</span> <a name="l00009"></a>00009 <span class="comment">* free software; you can redistribute it and/or modify</span> <a name="l00010"></a>00010 <span class="comment">* it under the terms of the GNU General Public License as published by</span> <a name="l00011"></a>00011 <span class="comment">* the Free Software Foundation; either version 2 of the License, or</span> <a name="l00012"></a>00012 <span class="comment">* (at your option) any later version.</span> <a name="l00013"></a>00013 <span class="comment">*</span> <a name="l00014"></a>00014 <span class="comment">* The GNU General Public License can be found at</span> <a name="l00015"></a>00015 <span class="comment">* http://www.gnu.org/copyleft/gpl.html.</span> <a name="l00016"></a>00016 <span class="comment">*</span> <a name="l00017"></a>00017 <span class="comment">* This script is distributed in the hope that it will be useful,</span> <a name="l00018"></a>00018 <span class="comment">* but WITHOUT ANY WARRANTY; without even the implied warranty of</span> <a name="l00019"></a>00019 <span class="comment">* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the</span> <a name="l00020"></a>00020 <span class="comment">* GNU General Public License for more details.</span> <a name="l00021"></a>00021 <span class="comment">*</span> <a name="l00022"></a>00022 <span class="comment">* This copyright notice MUST APPEAR in all copies of the script!</span> <a name="l00023"></a>00023 <span class="comment">***************************************************************/</span> <a name="l00136"></a><a class="code" href="classt3lib__cs.html">00136</a> <span class="keyword">class </span><a class="code" href="classt3lib__cs.html">t3lib_cs</a> { <a name="l00137"></a><a class="code" href="classt3lib__cs.html#9ab316dd87b5b365f857eb85f4a0a9df">00137</a> var <a class="code" href="classt3lib__cs.html#9ab316dd87b5b365f857eb85f4a0a9df">$noCharByteVal</a>=63; <span class="comment">// ASCII Value for chars with no equivalent.</span> <a name="l00138"></a>00138 <a name="l00139"></a>00139 <span class="comment">// This is the array where parsed conversion tables are stored (cached)</span> <a name="l00140"></a><a class="code" href="classt3lib__cs.html#4ebc1d87207362e257e0faf256f8d0b6">00140</a> var <a class="code" href="classt3lib__cs.html#4ebc1d87207362e257e0faf256f8d0b6">$parsedCharsets</a>=array(); <a name="l00141"></a>00141 <a name="l00142"></a>00142 <span class="comment">// An array where case folding data will be stored (cached)</span> <a name="l00143"></a><a class="code" href="classt3lib__cs.html#9912264b38a9ea1f6bd8165e9990b9ce">00143</a> var <a class="code" href="classt3lib__cs.html#9912264b38a9ea1f6bd8165e9990b9ce">$caseFolding</a>=array(); <a name="l00144"></a>00144 <a name="l00145"></a>00145 <span class="comment">// An array where charset-to-ASCII mappings are stored (cached)</span> <a name="l00146"></a><a class="code" href="classt3lib__cs.html#cbc9aa09194a1f907d36d85912802a45">00146</a> var <a class="code" href="classt3lib__cs.html#cbc9aa09194a1f907d36d85912802a45">$toASCII</a>=array(); <a name="l00147"></a>00147 <a name="l00148"></a>00148 <span class="comment">// This tells the converter which charsets has two bytes per char:</span> <a name="l00149"></a><a class="code" href="classt3lib__cs.html#6f5a9f0242a1c7c7e6d2606ded10a2e6">00149</a> var <a class="code" href="classt3lib__cs.html#6f5a9f0242a1c7c7e6d2606ded10a2e6">$twoByteSets</a>=array( <a name="l00150"></a>00150 'ucs-2'=>1, <span class="comment">// 2-byte Unicode</span> <a name="l00151"></a>00151 ); <a name="l00152"></a>00152 <a name="l00153"></a>00153 <span class="comment">// This tells the converter which charsets has four bytes per char:</span> <a name="l00154"></a><a class="code" href="classt3lib__cs.html#f03f4dc2397537781d7bd003b172fed6">00154</a> var <a class="code" href="classt3lib__cs.html#f03f4dc2397537781d7bd003b172fed6">$fourByteSets</a>=array( <a name="l00155"></a>00155 'ucs-4'=>1, <span class="comment">// 4-byte Unicode</span> <a name="l00156"></a>00156 'utf-32'=>1, <span class="comment">// 4-byte Unicode (limited to the 21-bits of UTF-16)</span> <a name="l00157"></a>00157 ); <a name="l00158"></a>00158 <a name="l00159"></a>00159 <span class="comment">// This tells the converter which charsets use a scheme like the Extended Unix Code:</span> <a name="l00160"></a><a class="code" href="classt3lib__cs.html#7de65ab48a3a93ebaf8ee18b47e49287">00160</a> var <a class="code" href="classt3lib__cs.html#7de65ab48a3a93ebaf8ee18b47e49287">$eucBasedSets</a>=array( <a name="l00161"></a>00161 'gb2312'=>1, <span class="comment">// Chinese, simplified.</span> <a name="l00162"></a>00162 'big5'=>1, <span class="comment">// Chinese, traditional.</span> <a name="l00163"></a>00163 'euc-kr'=>1, <span class="comment">// Korean</span> <a name="l00164"></a>00164 'shift_jis'=>1, <span class="comment">// Japanese - WARNING: Shift-JIS includes half-width katakana single-bytes characters above 0x80!</span> <a name="l00165"></a>00165 ); <a name="l00166"></a>00166 <a name="l00167"></a>00167 <span class="comment">// see http://developer.apple.com/documentation/macos8/TextIntlSvcs/TextEncodingConversionManager/TEC1.5/TEC.b0.html</span> <a name="l00168"></a>00168 <span class="comment">// http://czyborra.com/charsets/iso8859.html</span> <a name="l00169"></a><a class="code" href="classt3lib__cs.html#9bb5a04852c0887183eff86104014a1f">00169</a> var <a class="code" href="classt3lib__cs.html#9bb5a04852c0887183eff86104014a1f">$synonyms</a>=array( <a name="l00170"></a>00170 'us' => 'ascii', <a name="l00171"></a>00171 'us-ascii'=> 'ascii', <a name="l00172"></a>00172 'cp819' => 'iso-8859-1', <a name="l00173"></a>00173 'ibm819' => 'iso-8859-1', <a name="l00174"></a>00174 'iso-ir-100' => 'iso-8859-1', <a name="l00175"></a>00175 'iso-ir-109' => 'iso-8859-2', <a name="l00176"></a>00176 'iso-ir-148' => 'iso-8859-9', <a name="l00177"></a>00177 'iso-ir-199' => 'iso-8859-14', <a name="l00178"></a>00178 'iso-ir-203' => 'iso-8859-15', <a name="l00179"></a>00179 'csisolatin1' => 'iso-8859-1', <a name="l00180"></a>00180 'csisolatin2' => 'iso-8859-2', <a name="l00181"></a>00181 'csisolatin3' => 'iso-8859-3', <a name="l00182"></a>00182 'csisolatin5' => 'iso-8859-9', <a name="l00183"></a>00183 'csisolatin8' => 'iso-8859-14', <a name="l00184"></a>00184 'csisolatin9' => 'iso-8859-15', <a name="l00185"></a>00185 'csisolatingreek' => 'iso-8859-7', <a name="l00186"></a>00186 'iso-celtic' => 'iso-8859-14', <a name="l00187"></a>00187 'latin1' => 'iso-8859-1', <a name="l00188"></a>00188 'latin2' => 'iso-8859-2', <a name="l00189"></a>00189 'latin3' => 'iso-8859-3', <a name="l00190"></a>00190 'latin5' => 'iso-8859-9', <a name="l00191"></a>00191 'latin6' => 'iso-8859-10', <a name="l00192"></a>00192 'latin8' => 'iso-8859-14', <a name="l00193"></a>00193 'latin9' => 'iso-8859-15', <a name="l00194"></a>00194 'l1' => 'iso-8859-1', <a name="l00195"></a>00195 'l2' => 'iso-8859-2', <a name="l00196"></a>00196 'l3' => 'iso-8859-3', <a name="l00197"></a>00197 'l5' => 'iso-8859-9', <a name="l00198"></a>00198 'l6' => 'iso-8859-10', <a name="l00199"></a>00199 'l8' => 'iso-8859-14', <a name="l00200"></a>00200 'l9' => 'iso-8859-15', <a name="l00201"></a>00201 'cyrillic' => 'iso-8859-5', <a name="l00202"></a>00202 'arabic' => 'iso-8859-6', <a name="l00203"></a>00203 'tis-620' => 'iso-8859-11', <a name="l00204"></a>00204 'win874' => 'windows-874', <a name="l00205"></a>00205 'win1250' => 'windows-1250', <a name="l00206"></a>00206 'win1251' => 'windows-1251', <a name="l00207"></a>00207 'win1252' => 'windows-1252', <a name="l00208"></a>00208 'win1253' => 'windows-1253', <a name="l00209"></a>00209 'win1254' => 'windows-1254', <a name="l00210"></a>00210 'win1255' => 'windows-1255', <a name="l00211"></a>00211 'win1256' => 'windows-1256', <a name="l00212"></a>00212 'win1257' => 'windows-1257', <a name="l00213"></a>00213 'win1258' => 'windows-1258', <a name="l00214"></a>00214 'cp1250' => 'windows-1250', <a name="l00215"></a>00215 'cp1251' => 'windows-1251', <a name="l00216"></a>00216 'cp1252' => 'windows-1252', <a name="l00217"></a>00217 'ms-ee' => 'windows-1250', <a name="l00218"></a>00218 'ms-ansi' => 'windows-1252', <a name="l00219"></a>00219 'ms-greek' => 'windows-1253', <a name="l00220"></a>00220 'ms-turk' => 'windows-1254', <a name="l00221"></a>00221 'winbaltrim' => 'windows-1257', <a name="l00222"></a>00222 'koi-8ru' => 'koi-8r', <a name="l00223"></a>00223 'koi8r' => 'koi-8r', <a name="l00224"></a>00224 'cp878' => 'koi-8r', <a name="l00225"></a>00225 'mac' => 'macroman', <a name="l00226"></a>00226 'macintosh' => 'macroman', <a name="l00227"></a>00227 'euc-cn' => 'gb2312', <a name="l00228"></a>00228 'x-euc-cn' => 'gb2312', <a name="l00229"></a>00229 'euccn' => 'gb2312', <a name="l00230"></a>00230 'cp936' => 'gb2312', <a name="l00231"></a>00231 'big-5' => 'big5', <a name="l00232"></a>00232 'cp950' => 'big5', <a name="l00233"></a>00233 'eucjp' => 'euc-jp', <a name="l00234"></a>00234 'sjis' => 'shift_jis', <a name="l00235"></a>00235 'shift-jis' => 'shift_jis', <a name="l00236"></a>00236 'cp932' => 'shift_jis', <a name="l00237"></a>00237 'cp949' => 'euc-kr', <a name="l00238"></a>00238 'utf7' => 'utf-7', <a name="l00239"></a>00239 'utf8' => 'utf-8', <a name="l00240"></a>00240 'utf16' => 'utf-16', <a name="l00241"></a>00241 'utf32' => 'utf-32', <a name="l00242"></a>00242 'utf8' => 'utf-8', <a name="l00243"></a>00243 'ucs2' => 'ucs-2', <a name="l00244"></a>00244 'ucs4' => 'ucs-4', <a name="l00245"></a>00245 ); <a name="l00246"></a>00246 <a name="l00247"></a>00247 <span class="comment">// mapping of iso-639:2 language codes to script names</span> <a name="l00248"></a><a class="code" href="classt3lib__cs.html#21fc1d00a7274bdd25e7bbcbf2817c3b">00248</a> var <a class="code" href="classt3lib__cs.html#21fc1d00a7274bdd25e7bbcbf2817c3b">$lang_to_script</a>=array( <a name="l00249"></a>00249 <span class="comment">// iso-639:2 language codes, see:</span> <a name="l00250"></a>00250 <span class="comment">// http://www.w3.org/WAI/ER/IG/ert/iso639.htm</span> <a name="l00251"></a>00251 <span class="comment">// http://www.loc.gov/standards/iso639-2/langcodes.html</span> <a name="l00252"></a>00252 <span class="comment">// http://www.unicode.org/onlinedat/languages.html</span> <a name="l00253"></a>00253 'ar' => 'arabic', <a name="l00254"></a>00254 'bg' => 'cyrillic', <span class="comment">// Bulgarian</span> <a name="l00255"></a>00255 'bs' => 'east_european', <span class="comment">// Bosnian</span> <a name="l00256"></a>00256 'cs' => 'east_european', <span class="comment">// Czech</span> <a name="l00257"></a>00257 'da' => 'west_european', <span class="comment">// Danish</span> <a name="l00258"></a>00258 'de' => 'west_european', <span class="comment">// German</span> <a name="l00259"></a>00259 'es' => 'west_european', <span class="comment">// Spanish</span> <a name="l00260"></a>00260 'et' => 'estonian', <a name="l00261"></a>00261 'eo' => 'unicode', <span class="comment">// Esperanto</span> <a name="l00262"></a>00262 'eu' => 'west_european', <span class="comment">// Basque</span> <a name="l00263"></a>00263 'fa' => 'arabic', <span class="comment">// Persian</span> <a name="l00264"></a>00264 'fi' => 'west_european', <span class="comment">// Finish</span> <a name="l00265"></a>00265 'fo' => 'west_european', <span class="comment">// Faroese</span> <a name="l00266"></a>00266 'fr' => 'west_european', <span class="comment">// French</span> <a name="l00267"></a>00267 'gr' => 'greek', <a name="l00268"></a>00268 'he' => 'hebrew', <span class="comment">// Hebrew (since 1998)</span> <a name="l00269"></a>00269 'hi' => 'unicode', <span class="comment">// Hindi</span> <a name="l00270"></a>00270 'hr' => 'east_european', <span class="comment">// Croatian</span> <a name="l00271"></a>00271 'hu' => 'east_european', <span class="comment">// Hungarian</span> <a name="l00272"></a>00272 'iw' => 'hebrew', <span class="comment">// Hebrew (til 1998)</span> <a name="l00273"></a>00273 'is' => 'west_european', <span class="comment">// Icelandic</span> <a name="l00274"></a>00274 'it' => 'west_european', <span class="comment">// Italian</span> <a name="l00275"></a>00275 'ja' => 'japanese', <a name="l00276"></a>00276 'kl' => 'west_european', <span class="comment">// Greenlandic</span> <a name="l00277"></a>00277 'ko' => 'korean', <a name="l00278"></a>00278 'lt' => 'lithuanian', <a name="l00279"></a>00279 'lv' => 'west_european', <span class="comment">// Latvian/Lettish</span> <a name="l00280"></a>00280 'nl' => 'west_european', <span class="comment">// Dutch</span> <a name="l00281"></a>00281 'no' => 'west_european', <span class="comment">// Norwegian</span> <a name="l00282"></a>00282 'pl' => 'east_european', <span class="comment">// Polish</span> <a name="l00283"></a>00283 'pt' => 'west_european', <span class="comment">// Portuguese</span> <a name="l00284"></a>00284 'ro' => 'east_european', <span class="comment">// Romanian</span> <a name="l00285"></a>00285 'ru' => 'cyrillic', <span class="comment">// Russian</span> <a name="l00286"></a>00286 'sk' => 'east_european', <span class="comment">// Slovak</span> <a name="l00287"></a>00287 'sl' => 'east_european', <span class="comment">// Slovenian</span> <a name="l00288"></a>00288 'sr' => 'cyrillic', <span class="comment">// Serbian</span> <a name="l00289"></a>00289 'sv' => 'west_european', <span class="comment">// Swedish</span> <a name="l00290"></a>00290 'th' => 'thai', <a name="l00291"></a>00291 'uk' => 'cyrillic', <span class="comment">// Ukranian</span> <a name="l00292"></a>00292 'vi' => 'vietnamese', <a name="l00293"></a>00293 'zh' => 'chinese', <a name="l00294"></a>00294 <span class="comment">// MS language codes, see http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclib/html/_crt_language_strings.asp</span> <a name="l00295"></a>00295 <span class="comment">// http://msdn.microsoft.com/library/default.asp?url=/library/en-us/wceinternational5/html/wce50conLanguageIdentifiersandLocales.asp</span> <a name="l00296"></a>00296 'ara' => 'arabic', <a name="l00297"></a>00297 'bgr' => 'cyrillic', <span class="comment">// Bulgarian</span> <a name="l00298"></a>00298 'cat' => 'west_european', <span class="comment">// Catalan</span> <a name="l00299"></a>00299 'chs' => 'simpl_chinese', <a name="l00300"></a>00300 'cht' => 'trad_chinese', <a name="l00301"></a>00301 'csy' => 'east_european', <span class="comment">// Czech</span> <a name="l00302"></a>00302 'dan' => 'west_european', <span class="comment">// Danisch</span> <a name="l00303"></a>00303 'deu' => 'west_european', <span class="comment">// German</span> <a name="l00304"></a>00304 'dea' => 'west_european', <span class="comment">// German (Austrian)</span> <a name="l00305"></a>00305 'des' => 'west_european', <span class="comment">// German (Swiss)</span> <a name="l00306"></a>00306 'ena' => 'west_european', <span class="comment">// English (Australian)</span> <a name="l00307"></a>00307 'enc' => 'west_european', <span class="comment">// English (Canadian)</span> <a name="l00308"></a>00308 'eng' => 'west_european', <span class="comment">// English</span> <a name="l00309"></a>00309 'enz' => 'west_european', <span class="comment">// English (New Zealand)</span> <a name="l00310"></a>00310 'enu' => 'west_european', <span class="comment">// English (United States)</span> <a name="l00311"></a>00311 'euq' => 'west_european', <span class="comment">// Basque</span> <a name="l00312"></a>00312 'fos' => 'west_european', <span class="comment">// Faroese</span> <a name="l00313"></a>00313 'far' => 'arabic', <span class="comment">// Persian</span> <a name="l00314"></a>00314 'fin' => 'west_european', <span class="comment">// Finish</span> <a name="l00315"></a>00315 'fra' => 'west_european', <span class="comment">// French</span> <a name="l00316"></a>00316 'frb' => 'west_european', <span class="comment">// French (Belgian)</span> <a name="l00317"></a>00317 'frc' => 'west_european', <span class="comment">// French (Canadian)</span> <a name="l00318"></a>00318 'frs' => 'west_european', <span class="comment">// French (Swiss)</span> <a name="l00319"></a>00319 'ell' => 'greek', <a name="l00320"></a>00320 'heb' => 'hebrew', <a name="l00321"></a>00321 'hin' => 'unicode', <span class="comment">// Hindi</span> <a name="l00322"></a>00322 'hun' => 'east_european', <span class="comment">// Hungarian</span> <a name="l00323"></a>00323 'isl' => 'west_euorpean', <span class="comment">// Icelandic</span> <a name="l00324"></a>00324 'ita' => 'west_european', <span class="comment">// Italian</span> <a name="l00325"></a>00325 'its' => 'west_european', <span class="comment">// Italian (Swiss)</span> <a name="l00326"></a>00326 'jpn' => 'japanese', <a name="l00327"></a>00327 'kor' => 'korean', <a name="l00328"></a>00328 'lth' => 'lithuanian', <a name="l00329"></a>00329 'lvi' => 'west_european', <span class="comment">// Latvian/Lettish</span> <a name="l00330"></a>00330 'msl' => 'west_european', <span class="comment">// Malay</span> <a name="l00331"></a>00331 'nlb' => 'west_european', <span class="comment">// Dutch (Belgian)</span> <a name="l00332"></a>00332 'nld' => 'west_european', <span class="comment">// Dutch</span> <a name="l00333"></a>00333 'nor' => 'west_european', <span class="comment">// Norwegian (bokmal)</span> <a name="l00334"></a>00334 'non' => 'west_european', <span class="comment">// Norwegian (nynorsk)</span> <a name="l00335"></a>00335 'plk' => 'east_european', <span class="comment">// Polish</span> <a name="l00336"></a>00336 'ptg' => 'west_european', <span class="comment">// Portuguese</span> <a name="l00337"></a>00337 'ptb' => 'west_european', <span class="comment">// Portuguese (Brazil)</span> <a name="l00338"></a>00338 'rom' => 'east_european', <span class="comment">// Romanian</span> <a name="l00339"></a>00339 'rus' => 'cyrillic', <span class="comment">// Russian</span> <a name="l00340"></a>00340 'slv' => 'east_european', <span class="comment">// Slovenian</span> <a name="l00341"></a>00341 'sky' => 'east_european', <span class="comment">// Slovak</span> <a name="l00342"></a>00342 'srl' => 'east_european', <span class="comment">// Serbian (Latin)</span> <a name="l00343"></a>00343 'srb' => 'cyrillic', <span class="comment">// Serbian (Cyrillic)</span> <a name="l00344"></a>00344 'esp' => 'west_european', <span class="comment">// Spanish (trad. sort)</span> <a name="l00345"></a>00345 'esm' => 'west_european', <span class="comment">// Spanish (Mexican)</span> <a name="l00346"></a>00346 'esn' => 'west_european', <span class="comment">// Spanish (internat. sort)</span> <a name="l00347"></a>00347 'sve' => 'west_european', <span class="comment">// Swedish</span> <a name="l00348"></a>00348 'tha' => 'thai', <a name="l00349"></a>00349 'trk' => 'turkish', <a name="l00350"></a>00350 'ukr' => 'cyrillic', <span class="comment">// Ukrainian</span> <a name="l00351"></a>00351 <span class="comment">// English language names</span> <a name="l00352"></a>00352 'arabic' => 'arabic', <a name="l00353"></a>00353 'basque' => 'west_european', <a name="l00354"></a>00354 'bosnian' => 'east_european', <a name="l00355"></a>00355 'bulgarian' => 'east_european', <a name="l00356"></a>00356 'catalan' => 'west_european', <a name="l00357"></a>00357 'croatian' => 'east_european', <a name="l00358"></a>00358 'czech' => 'east_european', <a name="l00359"></a>00359 'danish' => 'west_european', <a name="l00360"></a>00360 'dutch' => 'west_european', <a name="l00361"></a>00361 'english' => 'west_european', <a name="l00362"></a>00362 'esperanto' => 'unicode', <a name="l00363"></a>00363 'estonian' => 'estonian', <a name="l00364"></a>00364 'faroese' => 'west_european', <a name="l00365"></a>00365 'farsi' => 'arabic', <a name="l00366"></a>00366 'finnish' => 'west_european', <a name="l00367"></a>00367 'french' => 'west_european', <a name="l00368"></a>00368 'galician' => 'west_european', <a name="l00369"></a>00369 'german' => 'west_european', <a name="l00370"></a>00370 'greek' => 'greek', <a name="l00371"></a>00371 'greenlandic' => 'west_european', <a name="l00372"></a>00372 'hebrew' => 'hebrew', <a name="l00373"></a>00373 'hindi' => 'unicode', <a name="l00374"></a>00374 'hungarian' => 'east_european', <a name="l00375"></a>00375 'icelandic' => 'west_european', <a name="l00376"></a>00376 'italian' => 'west_european', <a name="l00377"></a>00377 'latvian' => 'west_european', <a name="l00378"></a>00378 'lettish' => 'west_european', <a name="l00379"></a>00379 'lithuanian' => 'lithuanian', <a name="l00380"></a>00380 'malay' => 'west_european', <a name="l00381"></a>00381 'norwegian' => 'west_european', <a name="l00382"></a>00382 'persian' => 'arabic', <a name="l00383"></a>00383 'polish' => 'east_european', <a name="l00384"></a>00384 'portuguese' => 'west_european', <a name="l00385"></a>00385 'russian' => 'cyrillic', <a name="l00386"></a>00386 'romanian' => 'east_european', <a name="l00387"></a>00387 'serbian' => 'cyrillic', <a name="l00388"></a>00388 'slovak' => 'east_european', <a name="l00389"></a>00389 'slovenian' => 'east_european', <a name="l00390"></a>00390 'spanish' => 'west_european', <a name="l00391"></a>00391 'svedish' => 'west_european', <a name="l00392"></a>00392 'that' => 'thai', <a name="l00393"></a>00393 'turkish' => 'turkish', <a name="l00394"></a>00394 'ukrainian' => 'cyrillic', <a name="l00395"></a>00395 ); <a name="l00396"></a>00396 <a name="l00397"></a>00397 <span class="comment">// mapping of language (family) names to charsets on Unix</span> <a name="l00398"></a><a class="code" href="classt3lib__cs.html#c124a372529679f320a01ac2e9643f90">00398</a> var <a class="code" href="classt3lib__cs.html#c124a372529679f320a01ac2e9643f90">$script_to_charset_unix</a>=array( <a name="l00399"></a>00399 'west_european' => 'iso-8859-1', <a name="l00400"></a>00400 'estonian' => 'iso-8859-1', <a name="l00401"></a>00401 'east_european' => 'iso-8859-2', <a name="l00402"></a>00402 'baltic' => 'iso-8859-4', <a name="l00403"></a>00403 'cyrillic' => 'iso-8859-5', <a name="l00404"></a>00404 'arabic' => 'iso-8859-6', <a name="l00405"></a>00405 'greek' => 'iso-8859-7', <a name="l00406"></a>00406 'hebrew' => 'iso-8859-8', <a name="l00407"></a>00407 'turkish' => 'iso-8859-9', <a name="l00408"></a>00408 'thai' => 'iso-8859-11', <span class="comment">// = TIS-620</span> <a name="l00409"></a>00409 'lithuanian' => 'iso-8859-13', <a name="l00410"></a>00410 'chinese' => 'gb2312', <span class="comment">// = euc-cn</span> <a name="l00411"></a>00411 'japanese' => 'euc-jp', <a name="l00412"></a>00412 'korean' => 'euc-kr', <a name="l00413"></a>00413 'simpl_chinese' => 'gb2312', <a name="l00414"></a>00414 'trad_chinese' => 'big5', <a name="l00415"></a>00415 'vietnamese' => '', <a name="l00416"></a>00416 'unicode' => 'utf-8', <a name="l00417"></a>00417 ); <a name="l00418"></a>00418 <a name="l00419"></a>00419 <span class="comment">// mapping of language (family) names to charsets on Windows</span> <a name="l00420"></a><a class="code" href="classt3lib__cs.html#a08d813e271dd1e3546cc2632d229aff">00420</a> var <a class="code" href="classt3lib__cs.html#a08d813e271dd1e3546cc2632d229aff">$script_to_charset_windows</a>=array( <a name="l00421"></a>00421 'east_european' => 'windows-1250', <a name="l00422"></a>00422 'cyrillic' => 'windows-1251', <a name="l00423"></a>00423 'west_european' => 'windows-1252', <a name="l00424"></a>00424 'greek' => 'windows-1253', <a name="l00425"></a>00425 'turkish' => 'windows-1254', <a name="l00426"></a>00426 'hebrew' => 'windows-1255', <a name="l00427"></a>00427 'arabic' => 'windows-1256', <a name="l00428"></a>00428 'baltic' => 'windows-1257', <a name="l00429"></a>00429 'estonian' => 'windows-1257', <a name="l00430"></a>00430 'lithuanian' => 'windows-1257', <a name="l00431"></a>00431 'vietnamese' => 'windows-1258', <a name="l00432"></a>00432 'thai' => 'cp874', <a name="l00433"></a>00433 'korean' => 'cp949', <a name="l00434"></a>00434 'chinese' => 'gb2312', <a name="l00435"></a>00435 'japanese' => 'shift_jis', <a name="l00436"></a>00436 'simpl_chinese' => 'gb2312', <a name="l00437"></a>00437 'trad_chinese' => 'big5', <a name="l00438"></a>00438 ); <a name="l00439"></a>00439 <a name="l00440"></a>00440 <span class="comment">// mapping of locale names to charsets</span> <a name="l00441"></a><a class="code" href="classt3lib__cs.html#144b906c09da3dd2e969405778b4e6c4">00441</a> var <a class="code" href="classt3lib__cs.html#144b906c09da3dd2e969405778b4e6c4">$locale_to_charset</a>=array( <a name="l00442"></a>00442 'japanese.euc' => 'euc-jp', <a name="l00443"></a>00443 'ja_jp.ujis' => 'euc-jp', <a name="l00444"></a>00444 'korean.euc' => 'euc-kr', <a name="l00445"></a>00445 'sr@Latn' => 'iso-8859-2', <a name="l00446"></a>00446 'zh_cn' => 'gb2312', <a name="l00447"></a>00447 'zh_hk' => 'big5', <a name="l00448"></a>00448 'zh_tw' => 'big5', <a name="l00449"></a>00449 ); <a name="l00450"></a>00450 <a name="l00451"></a>00451 <span class="comment">// TYPO3 specific: Array with the system charsets used for each system language in TYPO3:</span> <a name="l00452"></a>00452 <span class="comment">// Empty values means "iso-8859-1"</span> <a name="l00453"></a><a class="code" href="classt3lib__cs.html#26a016f1c5ea7588cc345351d653e165">00453</a> var <a class="code" href="classt3lib__cs.html#26a016f1c5ea7588cc345351d653e165">$charSetArray</a> = array( <a name="l00454"></a>00454 'dk' => '', <a name="l00455"></a>00455 'de' => '', <a name="l00456"></a>00456 'no' => '', <a name="l00457"></a>00457 'it' => '', <a name="l00458"></a>00458 'fr' => '', <a name="l00459"></a>00459 'es' => '', <a name="l00460"></a>00460 'nl' => '', <a name="l00461"></a>00461 'cz' => 'windows-1250', <a name="l00462"></a>00462 'pl' => 'iso-8859-2', <a name="l00463"></a>00463 'si' => 'windows-1250', <a name="l00464"></a>00464 'fi' => '', <a name="l00465"></a>00465 'tr' => 'iso-8859-9', <a name="l00466"></a>00466 'se' => '', <a name="l00467"></a>00467 'pt' => '', <a name="l00468"></a>00468 'ru' => 'windows-1251', <a name="l00469"></a>00469 'ro' => 'iso-8859-2', <a name="l00470"></a>00470 'ch' => 'gb2312', <a name="l00471"></a>00471 'sk' => 'windows-1250', <a name="l00472"></a>00472 'lt' => 'windows-1257', <a name="l00473"></a>00473 'is' => 'utf-8', <a name="l00474"></a>00474 'hr' => 'windows-1250', <a name="l00475"></a>00475 'hu' => 'iso-8859-2', <a name="l00476"></a>00476 'gl' => '', <a name="l00477"></a>00477 'th' => 'iso-8859-11', <a name="l00478"></a>00478 'gr' => 'iso-8859-7', <a name="l00479"></a>00479 'hk' => 'big5', <a name="l00480"></a>00480 'eu' => '', <a name="l00481"></a>00481 'bg' => 'windows-1251', <a name="l00482"></a>00482 'br' => '', <a name="l00483"></a>00483 'et' => 'iso-8859-4', <a name="l00484"></a>00484 'ar' => 'iso-8859-6', <a name="l00485"></a>00485 'he' => 'utf-8', <a name="l00486"></a>00486 'ua' => 'windows-1251', <a name="l00487"></a>00487 'jp' => 'shift_jis', <a name="l00488"></a>00488 'lv' => 'utf-8', <a name="l00489"></a>00489 'vn' => 'utf-8', <a name="l00490"></a>00490 'ca' => 'iso-8859-15', <a name="l00491"></a>00491 'ba' => 'iso-8859-2', <a name="l00492"></a>00492 'kr' => 'euc-kr', <a name="l00493"></a>00493 'eo' => 'utf-8', <a name="l00494"></a>00494 'my' => '', <a name="l00495"></a>00495 'hi' => 'utf-8', <a name="l00496"></a>00496 'fo' => 'utf-8', <a name="l00497"></a>00497 'fa' => 'utf-8', <a name="l00498"></a>00498 'sr' => 'utf-8' <a name="l00499"></a>00499 ); <a name="l00500"></a>00500 <a name="l00501"></a>00501 <span class="comment">// TYPO3 specific: Array with the iso names used for each system language in TYPO3:</span> <a name="l00502"></a>00502 <span class="comment">// Missing keys means: same as Typo3</span> <a name="l00503"></a><a class="code" href="classt3lib__cs.html#60f145ab5597088de7edbf7ce1ae936c">00503</a> var <a class="code" href="classt3lib__cs.html#60f145ab5597088de7edbf7ce1ae936c">$isoArray</a> = array( <a name="l00504"></a>00504 'ba' => 'bs', <a name="l00505"></a>00505 'br' => 'pt_BR', <a name="l00506"></a>00506 'ch' => 'zh_CN', <a name="l00507"></a>00507 'cz' => 'cs', <a name="l00508"></a>00508 'dk' => 'da', <a name="l00509"></a>00509 'si' => 'sl', <a name="l00510"></a>00510 'se' => 'sv', <a name="l00511"></a>00511 'gl' => 'kl', <a name="l00512"></a>00512 'gr' => 'el', <a name="l00513"></a>00513 'hk' => 'zh_HK', <a name="l00514"></a>00514 'kr' => 'ko', <a name="l00515"></a>00515 'ua' => 'uk', <a name="l00516"></a>00516 'jp' => 'ja', <a name="l00517"></a>00517 'vn' => 'vi', <a name="l00518"></a>00518 ); <a name="l00519"></a>00519 <a name="l00527"></a><a class="code" href="classt3lib__cs.html#70173851d5afc216610e76d69d78935d">00527</a> function <a class="code" href="classt3lib__cs.html#70173851d5afc216610e76d69d78935d">parse_charset</a>($charset) { <a name="l00528"></a>00528 $charset = trim(strtolower($charset)); <a name="l00529"></a>00529 <span class="keywordflow">if</span> (isset($this->synonyms[$charset])) $charset = $this->synonyms[$charset]; <a name="l00530"></a>00530 <a name="l00531"></a>00531 <span class="keywordflow">return</span> $charset; <a name="l00532"></a>00532 } <a name="l00533"></a>00533 <a name="l00546"></a><a class="code" href="classt3lib__cs.html#50c8e620cc1aee632e29fe8b73ac7613">00546</a> function <a class="code" href="classt3lib__cs.html#50c8e620cc1aee632e29fe8b73ac7613">get_locale_charset</a>($locale) { <a name="l00547"></a>00547 $locale = strtolower($locale); <a name="l00548"></a>00548 <a name="l00549"></a>00549 <span class="comment">// exact locale specific charset?</span> <a name="l00550"></a>00550 <span class="keywordflow">if</span> (isset($this->locale_to_charset[$locale])) <span class="keywordflow">return</span> $this->locale_to_charset[$locale]; <a name="l00551"></a>00551 <a name="l00552"></a>00552 <span class="comment">// get modifier</span> <a name="l00553"></a>00553 list($locale,$modifier) = explode(<span class="charliteral">'@'</span>,$locale); <a name="l00554"></a>00554 <a name="l00555"></a>00555 <span class="comment">// locale contains charset: use it</span> <a name="l00556"></a>00556 list($locale,$charset) = explode(<span class="charliteral">'.'</span>,$locale); <a name="l00557"></a>00557 <span class="keywordflow">if</span> ($charset) <span class="keywordflow">return</span> $this-><a class="code" href="classt3lib__cs.html#70173851d5afc216610e76d69d78935d">parse_charset</a>($charset); <a name="l00558"></a>00558 <a name="l00559"></a>00559 <span class="comment">// modifier is 'euro' (after charset check, because of xx.utf-8@euro)</span> <a name="l00560"></a>00560 <span class="keywordflow">if</span> ($modifier == 'euro') <span class="keywordflow">return</span> 'iso-8859-15'; <a name="l00561"></a>00561 <a name="l00562"></a>00562 <span class="comment">// get language</span> <a name="l00563"></a>00563 list($language,$country) = explode(<span class="charliteral">'_'</span>,$locale); <a name="l00564"></a>00564 <span class="keywordflow">if</span> (isset($this->lang_to_script[$language])) $script = $this->lang_to_script[$language]; <a name="l00565"></a>00565 <a name="l00566"></a>00566 <span class="keywordflow">if</span> (TYPO3_OS == 'WIN') { <a name="l00567"></a>00567 $cs = $this->script_to_charset_windows[$script] ? $this->script_to_charset_windows[$script] : 'window-1252'; <a name="l00568"></a>00568 } <span class="keywordflow">else</span> { <a name="l00569"></a>00569 $cs = $this->script_to_charset_unix[$script] ? $this->script_to_charset_unix[$script] : 'iso-8859-1'; <a name="l00570"></a>00570 } <a name="l00571"></a>00571 <a name="l00572"></a>00572 <span class="keywordflow">return</span> $cs; <a name="l00573"></a>00573 } <a name="l00574"></a>00574 <a name="l00575"></a>00575 <a name="l00576"></a>00576 <a name="l00577"></a>00577 <a name="l00578"></a>00578 <a name="l00579"></a>00579 <a name="l00580"></a>00580 <a name="l00581"></a>00581 <a name="l00582"></a>00582 <a name="l00583"></a>00583 <span class="comment">/********************************************</span> <a name="l00584"></a>00584 <span class="comment"> *</span> <a name="l00585"></a>00585 <span class="comment"> * Charset Conversion functions</span> <a name="l00586"></a>00586 <span class="comment"> *</span> <a name="l00587"></a>00587 <span class="comment"> ********************************************/</span> <a name="l00588"></a>00588 <a name="l00599"></a><a class="code" href="classt3lib__cs.html#c0c3949ae6738f5553fd813fa4b8d047">00599</a> function <a class="code" href="classt3lib__cs.html#c0c3949ae6738f5553fd813fa4b8d047">conv</a>($str,$fromCS,$toCS,$useEntityForNoChar=0) { <a name="l00600"></a>00600 <span class="keywordflow">if</span> ($fromCS==$toCS) <span class="keywordflow">return</span> $str; <a name="l00601"></a>00601 <a name="l00602"></a>00602 <span class="comment">// PHP-libs don't support fallback to SGML entities, but UTF-8 handles everything</span> <a name="l00603"></a>00603 <span class="keywordflow">if</span> ($toCS=='utf-8' || !$useEntityForNoChar) { <a name="l00604"></a>00604 <span class="keywordflow">switch</span>($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod']) { <a name="l00605"></a>00605 <span class="keywordflow">case</span> 'mbstring': <a name="l00606"></a>00606 $conv_str = mb_convert_encoding($str,$toCS,$fromCS); <a name="l00607"></a>00607 <span class="keywordflow">if</span> (<span class="keyword">false</span> !== $conv_str) <span class="keywordflow">return</span> $conv_str; <span class="comment">// returns false for unsupported charsets</span> <a name="l00608"></a>00608 <span class="keywordflow">break</span>; <a name="l00609"></a>00609 <a name="l00610"></a>00610 <span class="keywordflow">case</span> 'iconv': <a name="l00611"></a>00611 $conv_str = iconv($fromCS,$toCS.'<span class="comment">//TRANSLIT',$str);</span> <a name="l00612"></a>00612 <span class="keywordflow">if</span> (<span class="keyword">false</span> !== $conv_str) <span class="keywordflow">return</span> $conv_str; <a name="l00613"></a>00613 <span class="keywordflow">break</span>; <a name="l00614"></a>00614 <a name="l00615"></a>00615 <span class="keywordflow">case</span> 'recode': <a name="l00616"></a>00616 $conv_str = recode_string($fromCS.'..'.$toCS,$str); <a name="l00617"></a>00617 <span class="keywordflow">if</span> (<span class="keyword">false</span> !== $conv_str) <span class="keywordflow">return</span> $conv_str; <a name="l00618"></a>00618 <span class="keywordflow">break</span>; <a name="l00619"></a>00619 } <a name="l00620"></a>00620 <span class="comment">// fallback to TYPO3 conversion</span> <a name="l00621"></a>00621 } <a name="l00622"></a>00622 <a name="l00623"></a>00623 <span class="keywordflow">if</span> ($fromCS!='utf-8') $str=$this-><a class="code" href="classt3lib__cs.html#11eaa1ad3c2b2e572282d9aab3438b0d">utf8_encode</a>($str,$fromCS); <a name="l00624"></a>00624 <span class="keywordflow">if</span> ($toCS!='utf-8') $str=$this-><a class="code" href="classt3lib__cs.html#dcc576daa00767dc9b298ddd7cfac1ba">utf8_decode</a>($str,$toCS,$useEntityForNoChar); <a name="l00625"></a>00625 <span class="keywordflow">return</span> $str; <a name="l00626"></a>00626 } <a name="l00627"></a>00627 <a name="l00639"></a><a class="code" href="classt3lib__cs.html#2106800313be12415652490b92204f79">00639</a> function <a class="code" href="classt3lib__cs.html#2106800313be12415652490b92204f79">convArray</a>(&$array,$fromCS,$toCS,$useEntityForNoChar=0) { <a name="l00640"></a>00640 foreach($array as $key => $value) { <a name="l00641"></a>00641 <span class="keywordflow">if</span> (is_array($array[$key])) { <a name="l00642"></a>00642 $this-><a class="code" href="classt3lib__cs.html#2106800313be12415652490b92204f79">convArray</a>($array[$key],$fromCS,$toCS,$useEntityForNoChar); <a name="l00643"></a>00643 } <span class="keywordflow">else</span> { <a name="l00644"></a>00644 $array[$key] = $this-><a class="code" href="classt3lib__cs.html#c0c3949ae6738f5553fd813fa4b8d047">conv</a>($array[$key],$fromCS,$toCS,$useEntityForNoChar); <a name="l00645"></a>00645 } <a name="l00646"></a>00646 } <a name="l00647"></a>00647 } <a name="l00648"></a>00648 <a name="l00656"></a><a class="code" href="classt3lib__cs.html#11eaa1ad3c2b2e572282d9aab3438b0d">00656</a> function <a class="code" href="classt3lib__cs.html#11eaa1ad3c2b2e572282d9aab3438b0d">utf8_encode</a>($str,$charset) { <a name="l00657"></a>00657 <a name="l00658"></a>00658 <span class="keywordflow">if</span> ($charset === 'utf-8') <span class="keywordflow">return</span> $str; <a name="l00659"></a>00659 <a name="l00660"></a>00660 <span class="comment">// Charset is case-insensitive.</span> <a name="l00661"></a>00661 <span class="keywordflow">if</span> ($this-><a class="code" href="classt3lib__cs.html#630dd257265f8f9f0955d21cafee1e56">initCharset</a>($charset)) { <span class="comment">// Parse conv. table if not already...</span> <a name="l00662"></a>00662 $strLen = <a class="code" href="classt3lib__cs.html#ba3dbbe621b02266e154ea0dfa15247a">strlen</a>($str); <a name="l00663"></a>00663 $outStr=''; <a name="l00664"></a>00664 <a name="l00665"></a>00665 <span class="keywordflow">for</span> ($a=0;$a<$strLen;$a++) { <span class="comment">// Traverse each char in string.</span> <a name="l00666"></a>00666 $chr=<a class="code" href="classt3lib__cs.html#68868a1e06c8f028dde56268b09ff92d">substr</a>($str,$a,1); <a name="l00667"></a>00667 $ord=ord($chr); <a name="l00668"></a>00668 <span class="keywordflow">if</span> (isset($this->twoByteSets[$charset])) { <span class="comment">// If the charset has two bytes per char</span> <a name="l00669"></a>00669 $ord2 = ord($str{$a+1}); <a name="l00670"></a>00670 $ord = $ord<<8 | $ord2; <span class="comment">// assume big endian</span> <a name="l00671"></a>00671 <a name="l00672"></a>00672 <span class="keywordflow">if</span> (isset($this->parsedCharsets[$charset]['local'][$ord])) { <span class="comment">// If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?)</span> <a name="l00673"></a>00673 $outStr.=$this->parsedCharsets[$charset]['local'][$ord]; <a name="l00674"></a>00674 } <span class="keywordflow">else</span> $outStr.=chr($this->noCharByteVal); <span class="comment">// No char exists</span> <a name="l00675"></a>00675 $a++; <a name="l00676"></a>00676 } elseif ($ord>127) { <span class="comment">// If char has value over 127 it's a multibyte char in UTF-8</span> <a name="l00677"></a>00677 <span class="keywordflow">if</span> (isset($this->eucBasedSets[$charset])) { <span class="comment">// EUC uses two-bytes above 127; we get both and advance pointer and make $ord a 16bit int.</span> <a name="l00678"></a>00678 <span class="keywordflow">if</span> ($charset != 'shift_jis' || ($ord < 0xA0 || $ord > 0xDF)) { <span class="comment">// Shift-JIS: chars between 160 and 223 are single byte</span> <a name="l00679"></a>00679 $a++; <a name="l00680"></a>00680 $ord2=ord(<a class="code" href="classt3lib__cs.html#68868a1e06c8f028dde56268b09ff92d">substr</a>($str,$a,1)); <a name="l00681"></a>00681 $ord = $ord*256+$ord2; <a name="l00682"></a>00682 } <a name="l00683"></a>00683 } <a name="l00684"></a>00684 <a name="l00685"></a>00685 <span class="keywordflow">if</span> (isset($this->parsedCharsets[$charset]['local'][$ord])) { <span class="comment">// If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?)</span> <a name="l00686"></a>00686 $outStr.= $this->parsedCharsets[$charset]['local'][$ord]; <a name="l00687"></a>00687 } <span class="keywordflow">else</span> $outStr.= chr($this->noCharByteVal); <span class="comment">// No char exists</span> <a name="l00688"></a>00688 } <span class="keywordflow">else</span> $outStr.= $chr; <span class="comment">// ... otherwise it's just ASCII 0-127 and one byte. Transparent</span> <a name="l00689"></a>00689 } <a name="l00690"></a>00690 <span class="keywordflow">return</span> $outStr; <a name="l00691"></a>00691 } <a name="l00692"></a>00692 } <a name="l00693"></a>00693 <a name="l00702"></a><a class="code" href="classt3lib__cs.html#dcc576daa00767dc9b298ddd7cfac1ba">00702</a> function utf8_decode($str,$charset,$useEntityForNoChar=0) { <a name="l00703"></a>00703 <a name="l00704"></a>00704 <span class="comment">// Charset is case-insensitive.</span> <a name="l00705"></a>00705 <span class="keywordflow">if</span> ($this->initCharset($charset)) { <span class="comment">// Parse conv. table if not already...</span> <a name="l00706"></a>00706 $strLen = strlen($str); <a name="l00707"></a>00707 $outStr=''; <a name="l00708"></a>00708 $buf=''; <a name="l00709"></a>00709 <span class="keywordflow">for</span> ($a=0,$i=0;$a<$strLen;$a++,$i++) { <span class="comment">// Traverse each char in UTF-8 string.</span> <a name="l00710"></a>00710 $chr=substr($str,$a,1); <a name="l00711"></a>00711 $ord=ord($chr); <a name="l00712"></a>00712 <span class="keywordflow">if</span> ($ord>127) { <span class="comment">// This means multibyte! (first byte!)</span> <a name="l00713"></a>00713 <span class="keywordflow">if</span> ($ord & 64) { <span class="comment">// Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.</span> <a name="l00714"></a>00714 <a name="l00715"></a>00715 $buf=$chr; <span class="comment">// Add first byte</span> <a name="l00716"></a>00716 <span class="keywordflow">for</span> ($b=0;$b<8;$b++) { <span class="comment">// for each byte in multibyte string...</span> <a name="l00717"></a>00717 $ord = $ord << 1; <span class="comment">// Shift it left and ...</span> <a name="l00718"></a>00718 <span class="keywordflow">if</span> ($ord & 128) { <span class="comment">// ... and with 8th bit - if that is set, then there are still bytes in sequence.</span> <a name="l00719"></a>00719 $a++; <span class="comment">// Increase pointer...</span> <a name="l00720"></a>00720 $buf.=substr($str,$a,1); <span class="comment">// ... and add the next char.</span> <a name="l00721"></a>00721 } <span class="keywordflow">else</span> <span class="keywordflow">break</span>; <a name="l00722"></a>00722 } <a name="l00723"></a>00723 <a name="l00724"></a>00724 <span class="keywordflow">if</span> (isset($this->parsedCharsets[$charset]['utf8'][$buf])) { <span class="comment">// If the UTF-8 char-sequence is found then...</span> <a name="l00725"></a>00725 $mByte = $this->parsedCharsets[$charset]['utf8'][$buf]; <span class="comment">// The local number</span> <a name="l00726"></a>00726 <span class="keywordflow">if</span> ($mByte>255) { <span class="comment">// If the local number is greater than 255 we will need to split the byte (16bit word assumed) in two chars.</span> <a name="l00727"></a>00727 $outStr.= chr(($mByte >> 8) & 255).chr($mByte & 255); <a name="l00728"></a>00728 } <span class="keywordflow">else</span> $outStr.= chr($mByte); <a name="l00729"></a>00729 } elseif ($useEntityForNoChar) { <span class="comment">// Create num entity:</span> <a name="l00730"></a>00730 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).<span class="charliteral">';'</span>; <a name="l00731"></a>00731 } <span class="keywordflow">else</span> $outStr.=chr($this->noCharByteVal); <span class="comment">// No char exists</span> <a name="l00732"></a>00732 } <span class="keywordflow">else</span> $outStr.=chr($this->noCharByteVal); <span class="comment">// No char exists (MIDDLE of MB sequence!)</span> <a name="l00733"></a>00733 } <span class="keywordflow">else</span> $outStr.=$chr; <span class="comment">// ... otherwise it's just ASCII 0-127 and one byte. Transparent</span> <a name="l00734"></a>00734 } <a name="l00735"></a>00735 <span class="keywordflow">return</span> $outStr; <a name="l00736"></a>00736 } <a name="l00737"></a>00737 } <a name="l00738"></a>00738 <a name="l00745"></a><a class="code" href="classt3lib__cs.html#4e8ebb826f8a7b5081e86556c9166a27">00745</a> function utf8_to_entities($str) { <a name="l00746"></a>00746 $strLen = strlen($str); <a name="l00747"></a>00747 $outStr=''; <a name="l00748"></a>00748 $buf=''; <a name="l00749"></a>00749 <span class="keywordflow">for</span> ($a=0;$a<$strLen;$a++) { <span class="comment">// Traverse each char in UTF-8 string.</span> <a name="l00750"></a>00750 $chr=substr($str,$a,1); <a name="l00751"></a>00751 $ord=ord($chr); <a name="l00752"></a>00752 <span class="keywordflow">if</span> ($ord>127) { <span class="comment">// This means multibyte! (first byte!)</span> <a name="l00753"></a>00753 <span class="keywordflow">if</span> ($ord & 64) { <span class="comment">// Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.</span> <a name="l00754"></a>00754 $buf=$chr; <span class="comment">// Add first byte</span> <a name="l00755"></a>00755 <span class="keywordflow">for</span> ($b=0;$b<8;$b++) { <span class="comment">// for each byte in multibyte string...</span> <a name="l00756"></a>00756 $ord = $ord << 1; <span class="comment">// Shift it left and ...</span> <a name="l00757"></a>00757 <span class="keywordflow">if</span> ($ord & 128) { <span class="comment">// ... and with 8th bit - if that is set, then there are still bytes in sequence.</span> <a name="l00758"></a>00758 $a++; <span class="comment">// Increase pointer...</span> <a name="l00759"></a>00759 $buf.=substr($str,$a,1); <span class="comment">// ... and add the next char.</span> <a name="l00760"></a>00760 } <span class="keywordflow">else</span> <span class="keywordflow">break</span>; <a name="l00761"></a>00761 } <a name="l00762"></a>00762 <a name="l00763"></a>00763 $outStr.='&#'.$this->utf8CharToUnumber($buf,1).<span class="charliteral">';'</span>; <a name="l00764"></a>00764 } <span class="keywordflow">else</span> $outStr.=chr($this->noCharByteVal); <span class="comment">// No char exists (MIDDLE of MB sequence!)</span> <a name="l00765"></a>00765 } <span class="keywordflow">else</span> $outStr.=$chr; <span class="comment">// ... otherwise it's just ASCII 0-127 and one byte. Transparent</span> <a name="l00766"></a>00766 } <a name="l00767"></a>00767 <a name="l00768"></a>00768 <span class="keywordflow">return</span> $outStr; <a name="l00769"></a>00769 } <a name="l00770"></a>00770 <a name="l00778"></a><a class="code" href="classt3lib__cs.html#9030ca4a9d778d7fe214e6391e26634c">00778</a> function entities_to_utf8($str,$alsoStdHtmlEnt=0) { <a name="l00779"></a>00779 <span class="keywordflow">if</span> ($alsoStdHtmlEnt) { <a name="l00780"></a>00780 $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES)); <span class="comment">// Getting them in iso-8859-1 - but thats ok since this is observed below.</span> <a name="l00781"></a>00781 } <a name="l00782"></a>00782 <a name="l00783"></a>00783 $token = md5(microtime()); <a name="l00784"></a>00784 $parts = explode($token,ereg_replace('(&([#[:alnum:]]*);)',$token.<span class="charliteral">'\2'</span>.$token,$str)); <a name="l00785"></a>00785 foreach($parts as $k => $v) { <a name="l00786"></a>00786 <span class="keywordflow">if</span> ($k%2) { <a name="l00787"></a>00787 <span class="keywordflow">if</span> (substr($v,0,1)==<span class="charliteral">'#'</span>) { <span class="comment">// Dec or hex entities:</span> <a name="l00788"></a>00788 <span class="keywordflow">if</span> (substr($v,1,1)==<span class="charliteral">'x'</span>) { <a name="l00789"></a>00789 $parts[$k] = $this->UnumberToChar(hexdec(substr($v,2))); <a name="l00790"></a>00790 } <span class="keywordflow">else</span> { <a name="l00791"></a>00791 $parts[$k] = $this->UnumberToChar(substr($v,1)); <a name="l00792"></a>00792 } <a name="l00793"></a>00793 } elseif ($alsoStdHtmlEnt && $trans_tbl[<span class="charliteral">'&'</span>.$v.<span class="charliteral">';'</span>]) { <span class="comment">// Other entities:</span> <a name="l00794"></a>00794 $parts[$k] = $this->utf8_encode($trans_tbl[<span class="charliteral">'&'</span>.$v.<span class="charliteral">';'</span>],'iso-8859-1'); <a name="l00795"></a>00795 } <span class="keywordflow">else</span> { <span class="comment">// No conversion:</span> <a name="l00796"></a>00796 $parts[$k] =<span class="charliteral">'&'</span>.$v.<span class="charliteral">';'</span>; <a name="l00797"></a>00797 } <a name="l00798"></a>00798 } <a name="l00799"></a>00799 } <a name="l00800"></a>00800 <a name="l00801"></a>00801 <span class="keywordflow">return</span> implode('',$parts); <a name="l00802"></a>00802 } <a name="l00803"></a>00803 <a name="l00812"></a><a class="code" href="classt3lib__cs.html#c8d3e880bb47b5211a1befc1b93eeef0">00812</a> function utf8_to_numberarray($str,$convEntities=0,$retChar=0) { <a name="l00813"></a>00813 <span class="comment">// If entities must be registered as well...:</span> <a name="l00814"></a>00814 <span class="keywordflow">if</span> ($convEntities) { <a name="l00815"></a>00815 $str = $this->entities_to_utf8($str,1); <a name="l00816"></a>00816 } <a name="l00817"></a>00817 <span class="comment">// Do conversion:</span> <a name="l00818"></a>00818 $strLen = strlen($str); <a name="l00819"></a>00819 $outArr=array(); <a name="l00820"></a>00820 $buf=''; <a name="l00821"></a>00821 <span class="keywordflow">for</span> ($a=0;$a<$strLen;$a++) { <span class="comment">// Traverse each char in UTF-8 string.</span> <a name="l00822"></a>00822 $chr=substr($str,$a,1); <a name="l00823"></a>00823 $ord=ord($chr); <a name="l00824"></a>00824 <span class="keywordflow">if</span> ($ord>127) { <span class="comment">// This means multibyte! (first byte!)</span> <a name="l00825"></a>00825 <span class="keywordflow">if</span> ($ord & 64) { <span class="comment">// Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence.</span> <a name="l00826"></a>00826 $buf=$chr; <span class="comment">// Add first byte</span> <a name="l00827"></a>00827 <span class="keywordflow">for</span> ($b=0;$b<8;$b++) { <span class="comment">// for each byte in multibyte string...</span> <a name="l00828"></a>00828 $ord = $ord << 1; <span class="comment">// Shift it left and ...</span> <a name="l00829"></a>00829 <span class="keywordflow">if</span> ($ord & 128) { <span class="comment">// ... and with 8th bit - if that is set, then there are still bytes in sequence.</span> <a name="l00830"></a>00830 $a++; <span class="comment">// Increase pointer...</span> <a name="l00831"></a>00831 $buf.=substr($str,$a,1); <span class="comment">// ... and add the next char.</span> <a name="l00832"></a>00832 } <span class="keywordflow">else</span> <span class="keywordflow">break</span>; <a name="l00833"></a>00833 } <a name="l00834"></a>00834 <a name="l00835"></a>00835 $outArr[]=$retChar?$buf:$this->utf8CharToUnumber($buf); <a name="l00836"></a>00836 } <span class="keywordflow">else</span> $outArr[]=$retChar?chr($this->noCharByteVal):$this->noCharByteVal; <span class="comment">// No char exists (MIDDLE of MB sequence!)</span> <a name="l00837"></a>00837 } <span class="keywordflow">else</span> $outArr[]=$retChar?chr($ord):$ord; <span class="comment">// ... otherwise it's just ASCII 0-127 and one byte. Transparent</span> <a name="l00838"></a>00838 } <a name="l00839"></a>00839 <a name="l00840"></a>00840 <span class="keywordflow">return</span> $outArr; <a name="l00841"></a>00841 } <a name="l00842"></a>00842 <a name="l00862"></a><a class="code" href="classt3lib__cs.html#1248c2717104716a0e141236a8ade04b">00862</a> function UnumberToChar($cbyte) { <a name="l00863"></a>00863 $str=''; <a name="l00864"></a>00864 <a name="l00865"></a>00865 <span class="keywordflow">if</span> ($cbyte < 0x80) { <a name="l00866"></a>00866 $str.=chr($cbyte); <a name="l00867"></a>00867 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte < 0x800) { <a name="l00868"></a>00868 $str.=chr(0xC0 | ($cbyte >> 6)); <a name="l00869"></a>00869 $str.=chr(0x80 | ($cbyte & 0x3F)); <a name="l00870"></a>00870 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte < 0x10000) { <a name="l00871"></a>00871 $str.=chr(0xE0 | ($cbyte >> 12)); <a name="l00872"></a>00872 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F)); <a name="l00873"></a>00873 $str.=chr(0x80 | ($cbyte & 0x3F)); <a name="l00874"></a>00874 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte < 0x200000) { <a name="l00875"></a>00875 $str.=chr(0xF0 | ($cbyte >> 18)); <a name="l00876"></a>00876 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F)); <a name="l00877"></a>00877 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F)); <a name="l00878"></a>00878 $str.=chr(0x80 | ($cbyte & 0x3F)); <a name="l00879"></a>00879 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte < 0x4000000) { <a name="l00880"></a>00880 $str.=chr(0xF8 | ($cbyte >> 24)); <a name="l00881"></a>00881 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F)); <a name="l00882"></a>00882 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F)); <a name="l00883"></a>00883 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F)); <a name="l00884"></a>00884 $str.=chr(0x80 | ($cbyte & 0x3F)); <a name="l00885"></a>00885 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> ($cbyte < 0x80000000) { <a name="l00886"></a>00886 $str.=chr(0xFC | ($cbyte >> 30)); <a name="l00887"></a>00887 $str.=chr(0x80 | (($cbyte >> 24) & 0x3F)); <a name="l00888"></a>00888 $str.=chr(0x80 | (($cbyte >> 18) & 0x3F)); <a name="l00889"></a>00889 $str.=chr(0x80 | (($cbyte >> 12) & 0x3F)); <a name="l00890"></a>00890 $str.=chr(0x80 | (($cbyte >> 6) & 0x3F)); <a name="l00891"></a>00891 $str.=chr(0x80 | ($cbyte & 0x3F)); <a name="l00892"></a>00892 } <span class="keywordflow">else</span> { <span class="comment">// Cannot express a 32-bit character in UTF-8</span> <a name="l00893"></a>00893 $str .= chr($this->noCharByteVal); <a name="l00894"></a>00894 } <a name="l00895"></a>00895 <span class="keywordflow">return</span> $str; <a name="l00896"></a>00896 } <a name="l00897"></a>00897 <a name="l00907"></a><a class="code" href="classt3lib__cs.html#9b8139f08c59ea65af04bcddecc7e98e">00907</a> function utf8CharToUnumber($str,$hex=0) { <a name="l00908"></a>00908 $ord=ord(substr($str,0,1)); <span class="comment">// First char</span> <a name="l00909"></a>00909 <a name="l00910"></a>00910 <span class="keywordflow">if</span> (($ord & 192) == 192) { <span class="comment">// This verifyes that it IS a multi byte string</span> <a name="l00911"></a>00911 $binBuf=''; <a name="l00912"></a>00912 <span class="keywordflow">for</span> ($b=0;$b<8;$b++) { <span class="comment">// for each byte in multibyte string...</span> <a name="l00913"></a>00913 $ord = $ord << 1; <span class="comment">// Shift it left and ...</span> <a name="l00914"></a>00914 <span class="keywordflow">if</span> ($ord & 128) { <span class="comment">// ... and with 8th bit - if that is set, then there are still bytes in sequence.</span> <a name="l00915"></a>00915 $binBuf.=substr('00000000'.decbin(ord(substr($str,$b+1,1))),-6); <a name="l00916"></a>00916 } <span class="keywordflow">else</span> <span class="keywordflow">break</span>; <a name="l00917"></a>00917 } <a name="l00918"></a>00918 $binBuf=substr('00000000'.decbin(ord(substr($str,0,1))),-(6-$b)).$binBuf; <a name="l00919"></a>00919 <a name="l00920"></a>00920 $int = bindec($binBuf); <a name="l00921"></a>00921 } <span class="keywordflow">else</span> $int = $ord; <a name="l00922"></a>00922 <a name="l00923"></a>00923 <span class="keywordflow">return</span> $hex ? <span class="charliteral">'x'</span>.dechex($int) : $int; <a name="l00924"></a>00924 } <a name="l00925"></a>00925 <a name="l00926"></a>00926 <a name="l00927"></a>00927 <a name="l00928"></a>00928 <a name="l00929"></a>00929 <a name="l00930"></a>00930 <a name="l00931"></a>00931 <a name="l00932"></a>00932 <a name="l00933"></a>00933 <a name="l00934"></a>00934 <span class="comment">/********************************************</span> <a name="l00935"></a>00935 <span class="comment"> *</span> <a name="l00936"></a>00936 <span class="comment"> * Init functions</span> <a name="l00937"></a>00937 <span class="comment"> *</span> <a name="l00938"></a>00938 <span class="comment"> ********************************************/</span> <a name="l00939"></a>00939 <a name="l00950"></a><a class="code" href="classt3lib__cs.html#630dd257265f8f9f0955d21cafee1e56">00950</a> function initCharset($charset) { <a name="l00951"></a>00951 <span class="comment">// Only process if the charset is not yet loaded:</span> <a name="l00952"></a>00952 <span class="keywordflow">if</span> (!is_array($this->parsedCharsets[$charset])) { <a name="l00953"></a>00953 <a name="l00954"></a>00954 <span class="comment">// Conversion table filename:</span> <a name="l00955"></a>00955 $charsetConvTableFile = PATH_t3lib.'csconvtbl/'.$charset.'.tbl'; <a name="l00956"></a>00956 <a name="l00957"></a>00957 <span class="comment">// If the conversion table is found:</span> <a name="l00958"></a>00958 <span class="keywordflow">if</span> ($charset && <a class="code" href="classt3lib__div.html#beba21fa59f1c0160c54d4174d19baf4">t3lib_div::validPathStr</a>($charsetConvTableFile) && @is_file($charsetConvTableFile)) { <a name="l00959"></a>00959 <span class="comment">// Cache file for charsets:</span> <a name="l00960"></a>00960 <span class="comment">// Caching brought parsing time for gb2312 down from 2400 ms to 150 ms. For other charsets we are talking 11 ms down to zero.</span> <a name="l00961"></a>00961 $cacheFile = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/charset_'.$charset.'.tbl'); <a name="l00962"></a>00962 <span class="keywordflow">if</span> ($cacheFile && @is_file($cacheFile)) { <a name="l00963"></a>00963 $this->parsedCharsets[$charset]=unserialize(t3lib_div::getUrl($cacheFile)); <a name="l00964"></a>00964 } <span class="keywordflow">else</span> { <a name="l00965"></a>00965 <span class="comment">// Parse conversion table into lines:</span> <a name="l00966"></a>00966 $lines=<a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(chr(10),t3lib_div::getUrl($charsetConvTableFile),1); <a name="l00967"></a>00967 <span class="comment">// Initialize the internal variable holding the conv. table:</span> <a name="l00968"></a>00968 $this->parsedCharsets[$charset]=array('local'=>array(),'utf8'=>array()); <a name="l00969"></a>00969 <span class="comment">// traverse the lines:</span> <a name="l00970"></a>00970 $detectedType=''; <a name="l00971"></a>00971 foreach($lines as $value) { <a name="l00972"></a>00972 <span class="keywordflow">if</span> (trim($value) && substr($value,0,1)!=<span class="charliteral">'#'</span>) { <span class="comment">// Comment line or blanks are ignored.</span> <a name="l00973"></a>00973 <a name="l00974"></a>00974 <span class="comment">// Detect type if not done yet: (Done on first real line)</span> <a name="l00975"></a>00975 <span class="comment">// The "whitespaced" type is on the syntax "0x0A 0x000A #LINE FEED" while "ms-token" is like "B9 = U+00B9 : SUPERSCRIPT ONE"</span> <a name="l00976"></a>00976 <span class="keywordflow">if</span> (!$detectedType) $detectedType = ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value) ? 'whitespaced' : 'ms-token'; <a name="l00977"></a>00977 <a name="l00978"></a>00978 <span class="keywordflow">if</span> ($detectedType=='ms-token') { <a name="l00979"></a>00979 list($hexbyte,$utf8) = split('=|:',$value,3); <a name="l00980"></a>00980 } elseif ($detectedType=='whitespaced') { <a name="l00981"></a>00981 $regA=array(); <a name="l00982"></a>00982 ereg('[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+',$value,$regA); <a name="l00983"></a>00983 $hexbyte = $regA[1]; <a name="l00984"></a>00984 $utf8 = 'U+'.$regA[2]; <a name="l00985"></a>00985 } <a name="l00986"></a>00986 $decval = hexdec(trim($hexbyte)); <a name="l00987"></a>00987 <span class="keywordflow">if</span> ($decval>127) { <a name="l00988"></a>00988 $utf8decval = hexdec(substr(trim($utf8),2)); <a name="l00989"></a>00989 $this->parsedCharsets[$charset]['local'][$decval]=$this->UnumberToChar($utf8decval); <a name="l00990"></a>00990 $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]]=$decval; <a name="l00991"></a>00991 } <a name="l00992"></a>00992 } <a name="l00993"></a>00993 } <a name="l00994"></a>00994 <span class="keywordflow">if</span> ($cacheFile) { <a name="l00995"></a>00995 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFile,serialize($this->parsedCharsets[$charset])); <a name="l00996"></a>00996 } <a name="l00997"></a>00997 } <a name="l00998"></a>00998 <span class="keywordflow">return</span> 2; <a name="l00999"></a>00999 } <span class="keywordflow">else</span> <span class="keywordflow">return</span> <span class="keyword">false</span>; <a name="l01000"></a>01000 } <span class="keywordflow">else</span> <span class="keywordflow">return</span> 1; <a name="l01001"></a>01001 } <a name="l01002"></a>01002 <a name="l01012"></a><a class="code" href="classt3lib__cs.html#342924e5872abb1227910d97556e5f48">01012</a> function initUnicodeData($mode=null) { <a name="l01013"></a>01013 <span class="comment">// cache files</span> <a name="l01014"></a>01014 $cacheFileCase = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/cscase_utf-8.tbl'); <a name="l01015"></a>01015 $cacheFileASCII = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/csascii_utf-8.tbl'); <a name="l01016"></a>01016 <a name="l01017"></a>01017 <span class="comment">// Only process if the tables are not yet loaded</span> <a name="l01018"></a>01018 <span class="keywordflow">switch</span>($mode) { <a name="l01019"></a>01019 <span class="keywordflow">case</span> '<span class="keywordflow">case</span>': <a name="l01020"></a>01020 <span class="keywordflow">if</span> (is_array($this->caseFolding['utf-8'])) <span class="keywordflow">return</span> 1; <a name="l01021"></a>01021 <a name="l01022"></a>01022 <span class="comment">// Use cached version if possible</span> <a name="l01023"></a>01023 <span class="keywordflow">if</span> ($cacheFileCase && @is_file($cacheFileCase)) { <a name="l01024"></a>01024 $this->caseFolding['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileCase)); <a name="l01025"></a>01025 <span class="keywordflow">return</span> 2; <a name="l01026"></a>01026 } <a name="l01027"></a>01027 <span class="keywordflow">break</span>; <a name="l01028"></a>01028 <a name="l01029"></a>01029 <span class="keywordflow">case</span> 'ascii': <a name="l01030"></a>01030 <span class="keywordflow">if</span> (is_array($this->toASCII['utf-8'])) <span class="keywordflow">return</span> 1; <a name="l01031"></a>01031 <a name="l01032"></a>01032 <span class="comment">// Use cached version if possible</span> <a name="l01033"></a>01033 <span class="keywordflow">if</span> ($cacheFileASCII && @is_file($cacheFileASCII)) { <a name="l01034"></a>01034 $this->toASCII['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileASCII)); <a name="l01035"></a>01035 <span class="keywordflow">return</span> 2; <a name="l01036"></a>01036 } <a name="l01037"></a>01037 <span class="keywordflow">break</span>; <a name="l01038"></a>01038 } <a name="l01039"></a>01039 <a name="l01040"></a>01040 <span class="comment">// process main Unicode data file</span> <a name="l01041"></a>01041 $unicodeDataFile = PATH_t3lib.'unidata/UnicodeData.txt'; <a name="l01042"></a>01042 <span class="keywordflow">if</span> (!(<a class="code" href="classt3lib__div.html#beba21fa59f1c0160c54d4174d19baf4">t3lib_div::validPathStr</a>($unicodeDataFile) && @is_file($unicodeDataFile))) <span class="keywordflow">return</span> <span class="keyword">false</span>; <a name="l01043"></a>01043 <a name="l01044"></a>01044 $fh = fopen($unicodeDataFile,'rb'); <a name="l01045"></a>01045 <span class="keywordflow">if</span> (!$fh) <span class="keywordflow">return</span> <span class="keyword">false</span>; <a name="l01046"></a>01046 <a name="l01047"></a>01047 <span class="comment">// key = utf8 char (single codepoint), value = utf8 string (codepoint sequence)</span> <a name="l01048"></a>01048 <span class="comment">// note: we use the UTF-8 characters here and not the Unicode numbers to avoid conversion roundtrip in utf8_strtolower/-upper)</span> <a name="l01049"></a>01049 $this->caseFolding['utf-8'] = array(); <a name="l01050"></a>01050 $utf8CaseFolding =& $this->caseFolding['utf-8']; <span class="comment">// a shorthand</span> <a name="l01051"></a>01051 $utf8CaseFolding['toUpper'] = array(); <a name="l01052"></a>01052 $utf8CaseFolding['toLower'] = array(); <a name="l01053"></a>01053 $utf8CaseFolding['toTitle'] = array(); <a name="l01054"></a>01054 <a name="l01055"></a>01055 $decomposition = array(); <span class="comment">// array of temp. decompositions</span> <a name="l01056"></a>01056 $mark = array(); <span class="comment">// array of chars that are marks (eg. composing accents)</span> <a name="l01057"></a>01057 $number = array(); <span class="comment">// array of chars that are numbers (eg. digits)</span> <a name="l01058"></a>01058 $omit = array(); <span class="comment">// array of chars to be omitted (eg. Russian hard sign)</span> <a name="l01059"></a>01059 <a name="l01060"></a>01060 <span class="keywordflow">while</span> (!feof($fh)) { <a name="l01061"></a>01061 $line = fgets($fh,4096); <a name="l01062"></a>01062 <span class="comment">// has a lot of info</span> <a name="l01063"></a>01063 list($char,$name,$cat,,,$decomp,,,$num,,,,$upper,$lower,$title,) = split(<span class="charliteral">';'</span>, rtrim($line)); <a name="l01064"></a>01064 <a name="l01065"></a>01065 $ord = hexdec($char); <a name="l01066"></a>01066 <span class="keywordflow">if</span> ($ord > 0xFFFF) <span class="keywordflow">break</span>; <span class="comment">// only process the BMP</span> <a name="l01067"></a>01067 <a name="l01068"></a>01068 $utf8_char = $this->UnumberToChar($ord); <a name="l01069"></a>01069 <a name="l01070"></a>01070 <span class="keywordflow">if</span> ($upper) $utf8CaseFolding['toUpper'][$utf8_char] = $this->UnumberToChar(hexdec($upper)); <a name="l01071"></a>01071 <span class="keywordflow">if</span> ($lower) $utf8CaseFolding['toLower'][$utf8_char] = $this->UnumberToChar(hexdec($lower)); <a name="l01072"></a>01072 <span class="comment">// store "title" only when different from "upper" (only a few)</span> <a name="l01073"></a>01073 <span class="keywordflow">if</span> ($title && $title != $upper) $utf8CaseFolding['toTitle'][$utf8_char] = $this->UnumberToChar(hexdec($title)); <a name="l01074"></a>01074 <a name="l01075"></a>01075 <span class="keywordflow">switch</span> ($cat{0}) { <a name="l01076"></a>01076 <span class="keywordflow">case</span> <span class="charliteral">'M'</span>: <span class="comment">// mark (accent, umlaut, ...)</span> <a name="l01077"></a>01077 $mark[<span class="stringliteral">"U+$char"</span>] = 1; <a name="l01078"></a>01078 <span class="keywordflow">break</span>; <a name="l01079"></a>01079 <a name="l01080"></a>01080 <span class="keywordflow">case</span> <span class="charliteral">'N'</span>: <span class="comment">// numeric value</span> <a name="l01081"></a>01081 <span class="keywordflow">if</span> ($ord > 0x80 && $num != '') $number[<span class="stringliteral">"U+$char"</span>] = $num; <a name="l01082"></a>01082 } <a name="l01083"></a>01083 <a name="l01084"></a>01084 <span class="comment">// accented Latin letters without "official" decomposition</span> <a name="l01085"></a>01085 $match = array(); <a name="l01086"></a>01086 <span class="keywordflow">if</span> (ereg('^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH',$name,$match) && !$decomp) { <a name="l01087"></a>01087 $c = ord($match[2]); <a name="l01088"></a>01088 <span class="keywordflow">if</span> ($match[1] == 'SMALL') $c += 32; <a name="l01089"></a>01089 <a name="l01090"></a>01090 $decomposition[<span class="stringliteral">"U+$char"</span>] = array(dechex($c)); <a name="l01091"></a>01091 <span class="keywordflow">continue</span>; <a name="l01092"></a>01092 } <a name="l01093"></a>01093 <a name="l01094"></a>01094 $match = array(); <a name="l01095"></a>01095 <span class="keywordflow">if</span> (ereg('(<.*>)? *(.+)',$decomp,$match)) { <a name="l01096"></a>01096 <span class="keywordflow">switch</span>($match[1]) { <a name="l01097"></a>01097 <span class="keywordflow">case</span> '<circle>': <span class="comment">// add parenthesis as circle replacement, eg (1)</span> <a name="l01098"></a>01098 $match[2] = '0028 '.$match[2].' 0029'; <a name="l01099"></a>01099 <span class="keywordflow">break</span>; <a name="l01100"></a>01100 <a name="l01101"></a>01101 <span class="keywordflow">case</span> '<square>': <span class="comment">// add square brackets as square replacement, eg [1]</span> <a name="l01102"></a>01102 $match[2] = '005B '.$match[2].' 005D'; <a name="l01103"></a>01103 <span class="keywordflow">break</span>; <a name="l01104"></a>01104 <a name="l01105"></a>01105 <span class="keywordflow">case</span> '<compat>': <span class="comment">// ignore multi char decompositions that start with a space</span> <a name="l01106"></a>01106 <span class="keywordflow">if</span> (ereg('^0020 ',$match[2])) <span class="keywordflow">continue</span> 2; <a name="l01107"></a>01107 <span class="keywordflow">break</span>; <a name="l01108"></a>01108 <a name="l01109"></a>01109 <span class="comment">// ignore Arabic and vertical layout presentation decomposition</span> <a name="l01110"></a>01110 <span class="keywordflow">case</span> '<initial>': <a name="l01111"></a>01111 <span class="keywordflow">case</span> '<medial>': <a name="l01112"></a>01112 <span class="keywordflow">case</span> '<<span class="keyword">final</span>>': <a name="l01113"></a>01113 <span class="keywordflow">case</span> '<isolated>': <a name="l01114"></a>01114 <span class="keywordflow">case</span> '<vertical>': <a name="l01115"></a>01115 <span class="keywordflow">continue</span> 2; <a name="l01116"></a>01116 } <a name="l01117"></a>01117 $decomposition[<span class="stringliteral">"U+$char"</span>] = split(<span class="charliteral">' '</span>,$match[2]); <a name="l01118"></a>01118 } <a name="l01119"></a>01119 } <a name="l01120"></a>01120 fclose($fh); <a name="l01121"></a>01121 <a name="l01122"></a>01122 <span class="comment">// process additional Unicode data for casing (allow folded characters to expand into a sequence)</span> <a name="l01123"></a>01123 $specialCasingFile = PATH_t3lib.'unidata/SpecialCasing.txt'; <a name="l01124"></a>01124 <span class="keywordflow">if</span> (<a class="code" href="classt3lib__div.html#beba21fa59f1c0160c54d4174d19baf4">t3lib_div::validPathStr</a>($specialCasingFile) && @is_file($specialCasingFile)) { <a name="l01125"></a>01125 $fh = fopen($specialCasingFile,'rb'); <a name="l01126"></a>01126 <span class="keywordflow">if</span> ($fh) { <a name="l01127"></a>01127 <span class="keywordflow">while</span> (!feof($fh)) { <a name="l01128"></a>01128 $line = fgets($fh,4096); <a name="l01129"></a>01129 <span class="keywordflow">if</span> ($line{0} != <span class="charliteral">'#'</span> && trim($line) != '') { <a name="l01130"></a>01130 <a name="l01131"></a>01131 list($char,$lower,$title,$upper,$cond) = <a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(<span class="charliteral">';'</span>, $line); <a name="l01132"></a>01132 <span class="keywordflow">if</span> ($cond == '' || $cond{0} == <span class="charliteral">'#'</span>) { <a name="l01133"></a>01133 $utf8_char = $this->UnumberToChar(hexdec($char)); <a name="l01134"></a>01134 <span class="keywordflow">if</span> ($char != $lower) { <a name="l01135"></a>01135 $arr = split(<span class="charliteral">' '</span>,$lower); <a name="l01136"></a>01136 <span class="keywordflow">for</span> ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); <a name="l01137"></a>01137 $utf8CaseFolding['toLower'][$utf8_char] = implode('',$arr); <a name="l01138"></a>01138 } <a name="l01139"></a>01139 <span class="keywordflow">if</span> ($char != $title && $title != $upper) { <a name="l01140"></a>01140 $arr = split(<span class="charliteral">' '</span>,$title); <a name="l01141"></a>01141 <span class="keywordflow">for</span> ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); <a name="l01142"></a>01142 $utf8CaseFolding['toTitle'][$utf8_char] = implode('',$arr); <a name="l01143"></a>01143 } <a name="l01144"></a>01144 <span class="keywordflow">if</span> ($char != $upper) { <a name="l01145"></a>01145 $arr = split(<span class="charliteral">' '</span>,$upper); <a name="l01146"></a>01146 <span class="keywordflow">for</span> ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); <a name="l01147"></a>01147 $utf8CaseFolding['toUpper'][$utf8_char] = implode('',$arr); <a name="l01148"></a>01148 } <a name="l01149"></a>01149 } <a name="l01150"></a>01150 } <a name="l01151"></a>01151 } <a name="l01152"></a>01152 fclose($fh); <a name="l01153"></a>01153 } <a name="l01154"></a>01154 } <a name="l01155"></a>01155 <a name="l01156"></a>01156 <span class="comment">// process custom decompositions</span> <a name="l01157"></a>01157 $customTranslitFile = PATH_t3lib.'unidata/Translit.txt'; <a name="l01158"></a>01158 <span class="keywordflow">if</span> (<a class="code" href="classt3lib__div.html#beba21fa59f1c0160c54d4174d19baf4">t3lib_div::validPathStr</a>($customTranslitFile) && @is_file($customTranslitFile)) { <a name="l01159"></a>01159 $fh = fopen($customTranslitFile,'rb'); <a name="l01160"></a>01160 <span class="keywordflow">if</span> ($fh) { <a name="l01161"></a>01161 <span class="keywordflow">while</span> (!feof($fh)) { <a name="l01162"></a>01162 $line = fgets($fh,4096); <a name="l01163"></a>01163 <span class="keywordflow">if</span> ($line{0} != <span class="charliteral">'#'</span> && trim($line) != '') { <a name="l01164"></a>01164 list($char,$translit) = <a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(<span class="charliteral">';'</span>, $line); <a name="l01165"></a>01165 <span class="keywordflow">if</span> (!$translit) $omit[<span class="stringliteral">"U+$char"</span>] = 1; <a name="l01166"></a>01166 $decomposition[<span class="stringliteral">"U+$char"</span>] = split(<span class="charliteral">' '</span>, $translit); <a name="l01167"></a>01167 <a name="l01168"></a>01168 } <a name="l01169"></a>01169 } <a name="l01170"></a>01170 fclose($fh); <a name="l01171"></a>01171 } <a name="l01172"></a>01172 } <a name="l01173"></a>01173 <a name="l01174"></a>01174 <span class="comment">// decompose and remove marks; inspired by unac (Loic Dachary <loic@senga.org>)</span> <a name="l01175"></a>01175 foreach($decomposition as $from => $to) { <a name="l01176"></a>01176 $code_decomp = array(); <a name="l01177"></a>01177 <a name="l01178"></a>01178 <span class="keywordflow">while</span> ($code_value = array_shift($to)) { <a name="l01179"></a>01179 <span class="keywordflow">if</span> (isset($decomposition[<span class="stringliteral">"U+$code_value"</span>])) { <span class="comment">// do recursive decomposition</span> <a name="l01180"></a>01180 foreach(array_reverse($decomposition[<span class="stringliteral">"U+$code_value"</span>]) as $cv) { <a name="l01181"></a>01181 array_unshift($to, $cv); <a name="l01182"></a>01182 } <a name="l01183"></a>01183 } elseif (!isset($mark[<span class="stringliteral">"U+$code_value"</span>])) { <span class="comment">// remove mark</span> <a name="l01184"></a>01184 array_push($code_decomp, $code_value); <a name="l01185"></a>01185 } <a name="l01186"></a>01186 } <a name="l01187"></a>01187 <span class="keywordflow">if</span> (count($code_decomp) || isset($omit[$from])) { <a name="l01188"></a>01188 $decomposition[$from] = $code_decomp; <a name="l01189"></a>01189 } <span class="keywordflow">else</span> { <a name="l01190"></a>01190 unset($decomposition[$from]); <a name="l01191"></a>01191 } <a name="l01192"></a>01192 } <a name="l01193"></a>01193 <a name="l01194"></a>01194 <span class="comment">// create ascii only mapping</span> <a name="l01195"></a>01195 $this->toASCII['utf-8'] = array(); <a name="l01196"></a>01196 $ascii =& $this->toASCII['utf-8']; <a name="l01197"></a>01197 <a name="l01198"></a>01198 foreach($decomposition as $from => $to) { <a name="l01199"></a>01199 $code_decomp = array(); <a name="l01200"></a>01200 <span class="keywordflow">while</span> ($code_value = array_shift($to)) { <a name="l01201"></a>01201 $ord = hexdec($code_value); <a name="l01202"></a>01202 <span class="keywordflow">if</span> ($ord > 127) <a name="l01203"></a>01203 <span class="keywordflow">continue</span> 2; <span class="comment">// skip decompositions containing non-ASCII chars</span> <a name="l01204"></a>01204 <span class="keywordflow">else</span> <a name="l01205"></a>01205 array_push($code_decomp,chr($ord)); <a name="l01206"></a>01206 } <a name="l01207"></a>01207 $ascii[$this->UnumberToChar(hexdec($from))] = join('',$code_decomp); <a name="l01208"></a>01208 } <a name="l01209"></a>01209 <a name="l01210"></a>01210 <span class="comment">// add numeric decompositions</span> <a name="l01211"></a>01211 foreach($number as $from => $to) { <a name="l01212"></a>01212 $utf8_char = $this->UnumberToChar(hexdec($from)); <a name="l01213"></a>01213 <span class="keywordflow">if</span> (!isset($ascii[$utf8_char])) { <a name="l01214"></a>01214 $ascii[$utf8_char] = $to; <a name="l01215"></a>01215 } <a name="l01216"></a>01216 } <a name="l01217"></a>01217 <a name="l01218"></a>01218 <span class="keywordflow">if</span> ($cacheFileCase) { <a name="l01219"></a>01219 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFileCase,serialize($utf8CaseFolding)); <a name="l01220"></a>01220 } <a name="l01221"></a>01221 <a name="l01222"></a>01222 <span class="keywordflow">if</span> ($cacheFileASCII) { <a name="l01223"></a>01223 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFileASCII,serialize($ascii)); <a name="l01224"></a>01224 } <a name="l01225"></a>01225 <a name="l01226"></a>01226 <span class="keywordflow">return</span> 3; <a name="l01227"></a>01227 } <a name="l01228"></a>01228 <a name="l01237"></a><a class="code" href="classt3lib__cs.html#68f8c921a75e9928f7726d5438c71405">01237</a> function initCaseFolding($charset) { <a name="l01238"></a>01238 <span class="comment">// Only process if the case table is not yet loaded:</span> <a name="l01239"></a>01239 <span class="keywordflow">if</span> (is_array($this->caseFolding[$charset])) <span class="keywordflow">return</span> 1; <a name="l01240"></a>01240 <a name="l01241"></a>01241 <span class="comment">// Use cached version if possible</span> <a name="l01242"></a>01242 $cacheFile = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/cscase_'.$charset.'.tbl'); <a name="l01243"></a>01243 <span class="keywordflow">if</span> ($cacheFile && @is_file($cacheFile)) { <a name="l01244"></a>01244 $this->caseFolding[$charset] = unserialize(t3lib_div::getUrl($cacheFile)); <a name="l01245"></a>01245 <span class="keywordflow">return</span> 2; <a name="l01246"></a>01246 } <a name="l01247"></a>01247 <a name="l01248"></a>01248 <span class="comment">// init UTF-8 conversion for this charset</span> <a name="l01249"></a>01249 <span class="keywordflow">if</span> (!$this->initCharset($charset)) { <a name="l01250"></a>01250 <span class="keywordflow">return</span> <span class="keyword">false</span>; <a name="l01251"></a>01251 } <a name="l01252"></a>01252 <a name="l01253"></a>01253 <span class="comment">// UTF-8 case folding is used as the base conversion table</span> <a name="l01254"></a>01254 <span class="keywordflow">if</span> (!$this->initUnicodeData('<span class="keywordflow">case</span>')) { <a name="l01255"></a>01255 <span class="keywordflow">return</span> <span class="keyword">false</span>; <a name="l01256"></a>01256 } <a name="l01257"></a>01257 <a name="l01258"></a>01258 $nochar = chr($this->noCharByteVal); <a name="l01259"></a>01259 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) { <a name="l01260"></a>01260 <span class="comment">// reconvert to charset (don't use chr() of numeric value, might be muli-byte)</span> <a name="l01261"></a>01261 $c = $this->utf8_decode($utf8, $charset); <a name="l01262"></a>01262 <a name="l01263"></a>01263 <span class="comment">// $cc = $this->conv($this->caseFolding['utf-8']['toUpper'][$utf8], 'utf-8', $charset);</span> <a name="l01264"></a>01264 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toUpper'][$utf8], $charset); <a name="l01265"></a>01265 <span class="keywordflow">if</span> ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toUpper'][$c] = $cc; <a name="l01266"></a>01266 <a name="l01267"></a>01267 <span class="comment">// $cc = $this->conv($this->caseFolding['utf-8']['toLower'][$utf8], 'utf-8', $charset);</span> <a name="l01268"></a>01268 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toLower'][$utf8], $charset); <a name="l01269"></a>01269 <span class="keywordflow">if</span> ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toLower'][$c] = $cc; <a name="l01270"></a>01270 <a name="l01271"></a>01271 <span class="comment">// $cc = $this->conv($this->caseFolding['utf-8']['toTitle'][$utf8], 'utf-8', $charset);</span> <a name="l01272"></a>01272 $cc = $this->utf8_decode($this->caseFolding['utf-8']['toTitle'][$utf8], $charset); <a name="l01273"></a>01273 <span class="keywordflow">if</span> ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toTitle'][$c] = $cc; <a name="l01274"></a>01274 } <a name="l01275"></a>01275 <a name="l01276"></a>01276 <span class="comment">// add the ASCII case table</span> <a name="l01277"></a>01277 <span class="keywordflow">for</span> ($i=ord(<span class="charliteral">'a'</span>); $i<=ord(<span class="charliteral">'z'</span>); $i++) { <a name="l01278"></a>01278 $this->caseFolding[$charset]['toUpper'][chr($i)] = chr($i-32); <a name="l01279"></a>01279 } <a name="l01280"></a>01280 <span class="keywordflow">for</span> ($i=ord(<span class="charliteral">'A'</span>); $i<=ord(<span class="charliteral">'Z'</span>); $i++) { <a name="l01281"></a>01281 $this->caseFolding[$charset]['toLower'][chr($i)] = chr($i+32); <a name="l01282"></a>01282 } <a name="l01283"></a>01283 <a name="l01284"></a>01284 <span class="keywordflow">if</span> ($cacheFile) { <a name="l01285"></a>01285 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFile,serialize($this->caseFolding[$charset])); <a name="l01286"></a>01286 } <a name="l01287"></a>01287 <a name="l01288"></a>01288 <span class="keywordflow">return</span> 3; <a name="l01289"></a>01289 } <a name="l01290"></a>01290 <a name="l01299"></a><a class="code" href="classt3lib__cs.html#17dabff9f5cb2162b4c1b0b1d9ac066f">01299</a> function initToASCII($charset) { <a name="l01300"></a>01300 <span class="comment">// Only process if the case table is not yet loaded:</span> <a name="l01301"></a>01301 <span class="keywordflow">if</span> (is_array($this->toASCII[$charset])) <span class="keywordflow">return</span> 1; <a name="l01302"></a>01302 <a name="l01303"></a>01303 <span class="comment">// Use cached version if possible</span> <a name="l01304"></a>01304 $cacheFile = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>('typo3temp/cs/csascii_'.$charset.'.tbl'); <a name="l01305"></a>01305 <span class="keywordflow">if</span> ($cacheFile && @is_file($cacheFile)) { <a name="l01306"></a>01306 $this->toASCII[$charset] = unserialize(t3lib_div::getUrl($cacheFile)); <a name="l01307"></a>01307 <span class="keywordflow">return</span> 2; <a name="l01308"></a>01308 } <a name="l01309"></a>01309 <a name="l01310"></a>01310 <span class="comment">// init UTF-8 conversion for this charset</span> <a name="l01311"></a>01311 <span class="keywordflow">if</span> (!$this->initCharset($charset)) { <a name="l01312"></a>01312 <span class="keywordflow">return</span> <span class="keyword">false</span>; <a name="l01313"></a>01313 } <a name="l01314"></a>01314 <a name="l01315"></a>01315 <span class="comment">// UTF-8/ASCII transliteration is used as the base conversion table</span> <a name="l01316"></a>01316 <span class="keywordflow">if</span> (!$this->initUnicodeData('ascii')) { <a name="l01317"></a>01317 <span class="keywordflow">return</span> <span class="keyword">false</span>; <a name="l01318"></a>01318 } <a name="l01319"></a>01319 <a name="l01320"></a>01320 $nochar = chr($this->noCharByteVal); <a name="l01321"></a>01321 foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) { <a name="l01322"></a>01322 <span class="comment">// reconvert to charset (don't use chr() of numeric value, might be muli-byte)</span> <a name="l01323"></a>01323 $c = $this->utf8_decode($utf8, $charset); <a name="l01324"></a>01324 <a name="l01325"></a>01325 <span class="keywordflow">if</span> (isset($this->toASCII['utf-8'][$utf8])) { <a name="l01326"></a>01326 $this->toASCII[$charset][$c] = $this->toASCII['utf-8'][$utf8]; <a name="l01327"></a>01327 } <a name="l01328"></a>01328 } <a name="l01329"></a>01329 <a name="l01330"></a>01330 <span class="keywordflow">if</span> ($cacheFile) { <a name="l01331"></a>01331 <a class="code" href="classt3lib__div.html#7084eaf77d7faf5270e703bdfd8d4bd6">t3lib_div::writeFileToTypo3tempDir</a>($cacheFile,serialize($this->toASCII[$charset])); <a name="l01332"></a>01332 } <a name="l01333"></a>01333 <a name="l01334"></a>01334 <span class="keywordflow">return</span> 3; <a name="l01335"></a>01335 } <a name="l01336"></a>01336 <a name="l01337"></a>01337 <a name="l01338"></a>01338 <a name="l01339"></a>01339 <a name="l01340"></a>01340 <a name="l01341"></a>01341 <a name="l01342"></a>01342 <a name="l01343"></a>01343 <a name="l01344"></a>01344 <a name="l01345"></a>01345 <a name="l01346"></a>01346 <a name="l01347"></a>01347 <a name="l01348"></a>01348 <a name="l01349"></a>01349 <a name="l01350"></a>01350 <a name="l01351"></a>01351 <a name="l01352"></a>01352 <span class="comment">/********************************************</span> <a name="l01353"></a>01353 <span class="comment"> *</span> <a name="l01354"></a>01354 <span class="comment"> * String operation functions</span> <a name="l01355"></a>01355 <span class="comment"> *</span> <a name="l01356"></a>01356 <span class="comment"> ********************************************/</span> <a name="l01357"></a>01357 <a name="l01370"></a><a class="code" href="classt3lib__cs.html#68868a1e06c8f028dde56268b09ff92d">01370</a> function substr($charset,$string,$start,$len=null) { <a name="l01371"></a>01371 <span class="keywordflow">if</span> ($len===0) <span class="keywordflow">return</span> ''; <a name="l01372"></a>01372 <a name="l01373"></a>01373 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { <a name="l01374"></a>01374 <span class="comment">// cannot omit $len, when specifying charset</span> <a name="l01375"></a>01375 <span class="keywordflow">if</span> ($len==null) { <a name="l01376"></a>01376 $enc = mb_internal_encoding(); <span class="comment">// save internal encoding</span> <a name="l01377"></a>01377 mb_internal_encoding($charset); <a name="l01378"></a>01378 $str = mb_substr($string,$start); <a name="l01379"></a>01379 mb_internal_encoding($enc); <span class="comment">// restore internal encoding</span> <a name="l01380"></a>01380 <a name="l01381"></a>01381 <span class="keywordflow">return</span> $str; <a name="l01382"></a>01382 } <a name="l01383"></a>01383 <span class="keywordflow">else</span> { <a name="l01384"></a>01384 <span class="keywordflow">return</span> mb_substr($string,$start,$len,$charset); <a name="l01385"></a>01385 } <a name="l01386"></a>01386 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { <a name="l01387"></a>01387 <span class="comment">// cannot omit $len, when specifying charset</span> <a name="l01388"></a>01388 <span class="keywordflow">if</span> ($len==null) { <a name="l01389"></a>01389 $enc = iconv_get_encoding('internal_encoding'); <span class="comment">// save internal encoding</span> <a name="l01390"></a>01390 iconv_set_encoding('internal_encoding',$charset); <a name="l01391"></a>01391 $str = iconv_substr($string,$start); <a name="l01392"></a>01392 iconv_set_encoding('internal_encoding',$enc); <span class="comment">// restore internal encoding</span> <a name="l01393"></a>01393 <a name="l01394"></a>01394 <span class="keywordflow">return</span> $str; <a name="l01395"></a>01395 } <a name="l01396"></a>01396 <span class="keywordflow">else</span> { <a name="l01397"></a>01397 <span class="keywordflow">return</span> iconv_substr($string,$start,$len,$charset); <a name="l01398"></a>01398 } <a name="l01399"></a>01399 } elseif ($charset == 'utf-8') { <a name="l01400"></a>01400 <span class="keywordflow">return</span> $this->utf8_substr($string,$start,$len); <a name="l01401"></a>01401 } elseif ($this->eucBasedSets[$charset]) { <a name="l01402"></a>01402 <span class="keywordflow">return</span> $this->euc_substr($string,$start,$charset,$len); <a name="l01403"></a>01403 } elseif ($this->twoByteSets[$charset]) { <a name="l01404"></a>01404 <span class="keywordflow">return</span> substr($string,$start*2,$len*2); <a name="l01405"></a>01405 } elseif ($this->fourByteSets[$charset]) { <a name="l01406"></a>01406 <span class="keywordflow">return</span> substr($string,$start*4,$len*4); <a name="l01407"></a>01407 } <a name="l01408"></a>01408 <a name="l01409"></a>01409 <span class="comment">// treat everything else as single-byte encoding</span> <a name="l01410"></a>01410 <span class="keywordflow">return</span> $len === NULL ? substr($string,$start) : substr($string,$start,$len); <a name="l01411"></a>01411 } <a name="l01412"></a>01412 <a name="l01423"></a><a class="code" href="classt3lib__cs.html#ba3dbbe621b02266e154ea0dfa15247a">01423</a> function strlen($charset,$string) { <a name="l01424"></a>01424 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { <a name="l01425"></a>01425 <span class="keywordflow">return</span> mb_strlen($string,$charset); <a name="l01426"></a>01426 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { <a name="l01427"></a>01427 <span class="keywordflow">return</span> iconv_strlen($string,$charset); <a name="l01428"></a>01428 } elseif ($charset == 'utf-8') { <a name="l01429"></a>01429 <span class="keywordflow">return</span> $this->utf8_strlen($string); <a name="l01430"></a>01430 } elseif ($this->eucBasedSets[$charset]) { <a name="l01431"></a>01431 <span class="keywordflow">return</span> $this->euc_strlen($string,$charset); <a name="l01432"></a>01432 } elseif ($this->twoByteSets[$charset]) { <a name="l01433"></a>01433 <span class="keywordflow">return</span> strlen($string)/2; <a name="l01434"></a>01434 } elseif ($this->fourByteSets[$charset]) { <a name="l01435"></a>01435 <span class="keywordflow">return</span> strlen($string)/4; <a name="l01436"></a>01436 } <a name="l01437"></a>01437 <span class="comment">// treat everything else as single-byte encoding</span> <a name="l01438"></a>01438 <span class="keywordflow">return</span> strlen($string); <a name="l01439"></a>01439 } <a name="l01440"></a>01440 <a name="l01453"></a><a class="code" href="classt3lib__cs.html#baaa8766ceae14506fa6724e8540c86d">01453</a> function crop($charset,$string,$len,$crop='') { <a name="l01454"></a>01454 <span class="keywordflow">if</span> (intval($len) == 0) <span class="keywordflow">return</span> $string; <a name="l01455"></a>01455 <a name="l01456"></a>01456 <span class="keywordflow">if</span> ($charset == 'utf-8') { <a name="l01457"></a>01457 $i = $this->utf8_char2byte_pos($string,$len); <a name="l01458"></a>01458 } elseif ($this->eucBasedSets[$charset]) { <a name="l01459"></a>01459 $i = $this->euc_char2byte_pos($string,$len,$charset); <a name="l01460"></a>01460 } <span class="keywordflow">else</span> { <a name="l01461"></a>01461 <span class="keywordflow">if</span> ($len > 0) { <a name="l01462"></a>01462 $i = $len; <a name="l01463"></a>01463 } <span class="keywordflow">else</span> { <a name="l01464"></a>01464 $i = strlen($string)+$len; <a name="l01465"></a>01465 <span class="keywordflow">if</span> ($i<=0) $i = <span class="keyword">false</span>; <a name="l01466"></a>01466 } <a name="l01467"></a>01467 } <a name="l01468"></a>01468 <a name="l01469"></a>01469 <span class="keywordflow">if</span> ($i === <span class="keyword">false</span>) { <span class="comment">// $len outside actual string length</span> <a name="l01470"></a>01470 <span class="keywordflow">return</span> $string; <a name="l01471"></a>01471 } <span class="keywordflow">else</span> { <a name="l01472"></a>01472 <span class="keywordflow">if</span> ($len > 0) { <a name="l01473"></a>01473 <span class="keywordflow">if</span> (strlen($string{$i})) { <a name="l01474"></a>01474 <span class="keywordflow">return</span> substr($string,0,$i).$crop; <a name="l01475"></a>01475 <a name="l01476"></a>01476 } <a name="l01477"></a>01477 } <span class="keywordflow">else</span> { <a name="l01478"></a>01478 <span class="keywordflow">if</span> (strlen($string{$i-1})) { <a name="l01479"></a>01479 <span class="keywordflow">return</span> $crop.substr($string,$i); <a name="l01480"></a>01480 } <a name="l01481"></a>01481 } <a name="l01482"></a>01482 <a name="l01483"></a>01483 <span class="comment">/*</span> <a name="l01484"></a>01484 <span class="comment"> if (abs($len)<$this->strlen($charset,$string)) { // Has to use ->strlen() - otherwise multibyte strings ending with a multibyte char will return true here (which is not a catastrophe, but...)</span> <a name="l01485"></a>01485 <span class="comment"> if ($len > 0) {</span> <a name="l01486"></a>01486 <span class="comment"> return substr($string,0,$i).$crop;</span> <a name="l01487"></a>01487 <span class="comment"> } else {</span> <a name="l01488"></a>01488 <span class="comment"> return $crop.substr($string,$i);</span> <a name="l01489"></a>01489 <span class="comment"> }</span> <a name="l01490"></a>01490 <span class="comment"> }</span> <a name="l01491"></a>01491 <span class="comment">*/</span> <a name="l01492"></a>01492 } <a name="l01493"></a>01493 <span class="keywordflow">return</span> $string; <a name="l01494"></a>01494 } <a name="l01495"></a>01495 <a name="l01506"></a><a class="code" href="classt3lib__cs.html#356eb8d5573975163a9eeb76fdca38b9">01506</a> function strtrunc($charset,$string,$len) { <a name="l01507"></a>01507 <span class="keywordflow">if</span> ($len <= 0) <span class="keywordflow">return</span> ''; <a name="l01508"></a>01508 <a name="l01509"></a>01509 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { <a name="l01510"></a>01510 <span class="keywordflow">return</span> mb_strcut($string,0,$len,$charset); <a name="l01511"></a>01511 } elseif ($charset == 'utf-8') { <a name="l01512"></a>01512 <span class="keywordflow">return</span> $this->utf8_strtrunc($string,$len); <a name="l01513"></a>01513 } elseif ($this->eucBasedSets[$charset]) { <a name="l01514"></a>01514 <span class="keywordflow">return</span> $this->euc_strtrunc($string,$charset); <a name="l01515"></a>01515 } elseif ($this->twoByteSets[$charset]) { <a name="l01516"></a>01516 <span class="keywordflow">if</span> ($len % 2) $len--; <span class="comment">// don't cut at odd positions</span> <a name="l01517"></a>01517 } elseif ($this->fourByteSets[$charset]) { <a name="l01518"></a>01518 $x = $len % 4; <a name="l01519"></a>01519 $len -= $x; <span class="comment">// realign to position dividable by four</span> <a name="l01520"></a>01520 } <a name="l01521"></a>01521 <span class="comment">// treat everything else as single-byte encoding</span> <a name="l01522"></a>01522 <span class="keywordflow">return</span> substr($string,0,$len); <a name="l01523"></a>01523 } <a name="l01524"></a>01524 <a name="l01540"></a><a class="code" href="classt3lib__cs.html#9eb6df13082ebca55b30a039ec9c2a64">01540</a> function conv_case($charset,$string,$case) { <a name="l01541"></a>01541 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring' && (<span class="keywordtype">float</span>)phpversion() >= 4.3) { <a name="l01542"></a>01542 <span class="keywordflow">if</span> ($case == 'toLower') { <a name="l01543"></a>01543 $string = mb_strtolower($string,$charset); <a name="l01544"></a>01544 } <span class="keywordflow">else</span> { <a name="l01545"></a>01545 $string = mb_strtoupper($string,$charset); <a name="l01546"></a>01546 } <a name="l01547"></a>01547 } elseif ($charset == 'utf-8') { <a name="l01548"></a>01548 $string = $this->utf8_char_mapping($string,'<span class="keywordflow">case</span>',$case); <a name="l01549"></a>01549 } elseif (isset($this->eucBasedSets[$charset])) { <a name="l01550"></a>01550 $string = $this->euc_char_mapping($string,$charset,'<span class="keywordflow">case</span>',$case); <a name="l01551"></a>01551 } <span class="keywordflow">else</span> { <a name="l01552"></a>01552 <span class="comment">// treat everything else as single-byte encoding</span> <a name="l01553"></a>01553 $string = $this->sb_char_mapping($string,$charset,'<span class="keywordflow">case</span>',$case); <a name="l01554"></a>01554 } <a name="l01555"></a>01555 <a name="l01556"></a>01556 <span class="keywordflow">return</span> $string; <a name="l01557"></a>01557 } <a name="l01558"></a>01558 <a name="l01566"></a><a class="code" href="classt3lib__cs.html#a3eb390354ec2121ec912583416fefe7">01566</a> function specCharsToASCII($charset,$string) { <a name="l01567"></a>01567 <span class="keywordflow">if</span> ($charset == 'utf-8') { <a name="l01568"></a>01568 $string = $this->utf8_char_mapping($string,'ascii'); <a name="l01569"></a>01569 } elseif (isset($this->eucBasedSets[$charset])) { <a name="l01570"></a>01570 $string = $this->euc_char_mapping($string,$charset,'ascii'); <a name="l01571"></a>01571 } <span class="keywordflow">else</span> { <a name="l01572"></a>01572 <span class="comment">// treat everything else as single-byte encoding</span> <a name="l01573"></a>01573 $string = $this->sb_char_mapping($string,$charset,'ascii'); <a name="l01574"></a>01574 } <a name="l01575"></a>01575 <a name="l01576"></a>01576 <span class="keywordflow">return</span> $string; <a name="l01577"></a>01577 } <a name="l01578"></a>01578 <a name="l01579"></a>01579 <a name="l01580"></a>01580 <a name="l01581"></a>01581 <a name="l01582"></a>01582 <a name="l01583"></a>01583 <a name="l01584"></a>01584 <a name="l01585"></a>01585 <a name="l01586"></a>01586 <a name="l01587"></a>01587 <a name="l01588"></a>01588 <a name="l01589"></a>01589 <a name="l01590"></a>01590 <span class="comment">/********************************************</span> <a name="l01591"></a>01591 <span class="comment"> *</span> <a name="l01592"></a>01592 <span class="comment"> * Internal string operation functions</span> <a name="l01593"></a>01593 <span class="comment"> *</span> <a name="l01594"></a>01594 <span class="comment"> ********************************************/</span> <a name="l01595"></a>01595 <a name="l01606"></a><a class="code" href="classt3lib__cs.html#67e70dcda5973aef6b9f3740f658561f">01606</a> function sb_char_mapping($str,$charset,$mode,$opt='') { <a name="l01607"></a>01607 <span class="keywordflow">switch</span>($mode) { <a name="l01608"></a>01608 <span class="keywordflow">case</span> '<span class="keywordflow">case</span>': <a name="l01609"></a>01609 <span class="keywordflow">if</span> (!$this->initCaseFolding($charset)) <span class="keywordflow">return</span> $str; <span class="comment">// do nothing</span> <a name="l01610"></a>01610 $map =& $this->caseFolding[$charset][$opt]; <a name="l01611"></a>01611 <span class="keywordflow">break</span>; <a name="l01612"></a>01612 <a name="l01613"></a>01613 <span class="keywordflow">case</span> 'ascii': <a name="l01614"></a>01614 <span class="keywordflow">if</span> (!$this->initToASCII($charset)) <span class="keywordflow">return</span> $str; <span class="comment">// do nothing</span> <a name="l01615"></a>01615 $map =& $this->toASCII[$charset]; <a name="l01616"></a>01616 <span class="keywordflow">break</span>; <a name="l01617"></a>01617 <a name="l01618"></a>01618 <span class="keywordflow">default</span>: <a name="l01619"></a>01619 <span class="keywordflow">return</span> $str; <a name="l01620"></a>01620 } <a name="l01621"></a>01621 <a name="l01622"></a>01622 $out = ''; <a name="l01623"></a>01623 <span class="keywordflow">for</span>($i=0; strlen($str{$i}); $i++) { <a name="l01624"></a>01624 $c = $str{$i}; <a name="l01625"></a>01625 <span class="keywordflow">if</span> (isset($map[$c])) { <a name="l01626"></a>01626 $out .= $map[$c]; <a name="l01627"></a>01627 } <span class="keywordflow">else</span> { <a name="l01628"></a>01628 $out .= $c; <a name="l01629"></a>01629 } <a name="l01630"></a>01630 } <a name="l01631"></a>01631 <a name="l01632"></a>01632 <span class="keywordflow">return</span> $out; <a name="l01633"></a>01633 } <a name="l01634"></a>01634 <a name="l01635"></a>01635 <a name="l01636"></a>01636 <a name="l01637"></a>01637 <a name="l01638"></a>01638 <a name="l01639"></a>01639 <a name="l01640"></a>01640 <a name="l01641"></a>01641 <a name="l01642"></a>01642 <a name="l01643"></a>01643 <a name="l01644"></a>01644 <span class="comment">/********************************************</span> <a name="l01645"></a>01645 <span class="comment"> *</span> <a name="l01646"></a>01646 <span class="comment"> * Internal UTF-8 string operation functions</span> <a name="l01647"></a>01647 <span class="comment"> *</span> <a name="l01648"></a>01648 <span class="comment"> ********************************************/</span> <a name="l01649"></a>01649 <a name="l01661"></a><a class="code" href="classt3lib__cs.html#3d99778bbc250f606478170343556072">01661</a> function utf8_substr($str,$start,$len=null) { <a name="l01662"></a>01662 <span class="keywordflow">if</span> (!strcmp($len,<span class="charliteral">'0'</span>)) <span class="keywordflow">return</span> ''; <a name="l01663"></a>01663 <a name="l01664"></a>01664 $byte_start = $this->utf8_char2byte_pos($str,$start); <a name="l01665"></a>01665 <span class="keywordflow">if</span> ($byte_start === <span class="keyword">false</span>) { <a name="l01666"></a>01666 <span class="keywordflow">if</span> ($start > 0) { <a name="l01667"></a>01667 <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// $start outside string length</span> <a name="l01668"></a>01668 } <span class="keywordflow">else</span> { <a name="l01669"></a>01669 $start = 0; <a name="l01670"></a>01670 } <a name="l01671"></a>01671 } <a name="l01672"></a>01672 <a name="l01673"></a>01673 $str = substr($str,$byte_start); <a name="l01674"></a>01674 <a name="l01675"></a>01675 <span class="keywordflow">if</span> ($len!=null) { <a name="l01676"></a>01676 $byte_end = $this->utf8_char2byte_pos($str,$len); <a name="l01677"></a>01677 <span class="keywordflow">if</span> ($byte_end === <span class="keyword">false</span>) <span class="comment">// $len outside actual string length</span> <a name="l01678"></a>01678 <span class="keywordflow">return</span> $len<0 ? '' : $str; <span class="comment">// When length is less than zero and exceeds, then we return blank string.</span> <a name="l01679"></a>01679 <span class="keywordflow">else</span> <a name="l01680"></a>01680 <span class="keywordflow">return</span> substr($str,0,$byte_end); <a name="l01681"></a>01681 } <a name="l01682"></a>01682 <span class="keywordflow">else</span> <span class="keywordflow">return</span> $str; <a name="l01683"></a>01683 } <a name="l01684"></a>01684 <a name="l01694"></a><a class="code" href="classt3lib__cs.html#7962ae7eebe4d5d3d7664b75b8b30efe">01694</a> function utf8_strlen($str) { <a name="l01695"></a>01695 $n=0; <a name="l01696"></a>01696 <span class="keywordflow">for</span>($i=0; strlen($str{$i}); $i++) { <a name="l01697"></a>01697 $c = ord($str{$i}); <a name="l01698"></a>01698 <span class="keywordflow">if</span> (!($c & 0x80)) <span class="comment">// single-byte (0xxxxxx)</span> <a name="l01699"></a>01699 $n++; <a name="l01700"></a>01700 elseif (($c & 0xC0) == 0xC0) <span class="comment">// multi-byte starting byte (11xxxxxx)</span> <a name="l01701"></a>01701 $n++; <a name="l01702"></a>01702 } <a name="l01703"></a>01703 <span class="keywordflow">return</span> $n; <a name="l01704"></a>01704 } <a name="l01705"></a>01705 <a name="l01715"></a><a class="code" href="classt3lib__cs.html#65720073d9310f352eb0787048c0e033">01715</a> function utf8_strtrunc($str,$len) { <a name="l01716"></a>01716 $i = $len-1; <a name="l01717"></a>01717 <span class="keywordflow">if</span> (ord($str{$i}) & 0x80) { <span class="comment">// part of a multibyte sequence</span> <a name="l01718"></a>01718 <span class="keywordflow">for</span> (; $i>0 && !(ord($str{$i}) & 0x40); $i--) ; <span class="comment">// find the first byte</span> <a name="l01719"></a>01719 <span class="keywordflow">if</span> ($i <= 0) <span class="keywordflow">return</span> ''; <span class="comment">// sanity check</span> <a name="l01720"></a>01720 <span class="keywordflow">for</span> ($bc=0, $mbs=ord($str{$i}); $mbs & 0x80; $mbs = $mbs << 1) $bc++; <span class="comment">// calculate number of bytes</span> <a name="l01721"></a>01721 <span class="keywordflow">if</span> ($bc+$i > $len) <span class="keywordflow">return</span> substr($str,0,$i); <a name="l01722"></a>01722 <span class="comment">// fallthru: multibyte char fits into length</span> <a name="l01723"></a>01723 } <a name="l01724"></a>01724 <span class="keywordflow">return</span> substr($str,0,$len); <a name="l01725"></a>01725 } <a name="l01726"></a>01726 <a name="l01737"></a><a class="code" href="classt3lib__cs.html#61b51591560e3742472e705d9b60fc50">01737</a> function utf8_strpos($haystack,$needle,$offset=0) { <a name="l01738"></a>01738 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { <a name="l01739"></a>01739 <span class="keywordflow">return</span> mb_strpos($haystack,$needle,$offset,'utf-8'); <a name="l01740"></a>01740 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { <a name="l01741"></a>01741 <span class="keywordflow">return</span> iconv_strpos($haystack,$needle,$offset,'utf-8'); <a name="l01742"></a>01742 } <a name="l01743"></a>01743 <a name="l01744"></a>01744 $byte_offset = $this->utf8_char2byte_pos($haystack,$offset); <a name="l01745"></a>01745 <span class="keywordflow">if</span> ($byte_offset === <span class="keyword">false</span>) <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// offset beyond string length</span> <a name="l01746"></a>01746 <a name="l01747"></a>01747 $byte_pos = strpos($haystack,$needle,$byte_offset); <a name="l01748"></a>01748 <span class="keywordflow">if</span> ($byte_pos === <span class="keyword">false</span>) <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// needle not found</span> <a name="l01749"></a>01749 <a name="l01750"></a>01750 <span class="keywordflow">return</span> $this->utf8_byte2char_pos($haystack,$byte_pos); <a name="l01751"></a>01751 } <a name="l01752"></a>01752 <a name="l01762"></a><a class="code" href="classt3lib__cs.html#030d8d8b016c54cf229ec7e9b5f0d177">01762</a> function utf8_strrpos($haystack,$needle) { <a name="l01763"></a>01763 <span class="keywordflow">if</span> ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { <a name="l01764"></a>01764 <span class="keywordflow">return</span> mb_strrpos($haystack,$needle,'utf-8'); <a name="l01765"></a>01765 } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { <a name="l01766"></a>01766 <span class="keywordflow">return</span> iconv_strrpos($haystack,$needle,'utf-8'); <a name="l01767"></a>01767 } <a name="l01768"></a>01768 <a name="l01769"></a>01769 $byte_pos = strrpos($haystack,$needle); <a name="l01770"></a>01770 <span class="keywordflow">if</span> ($byte_pos === <span class="keyword">false</span>) <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// needle not found</span> <a name="l01771"></a>01771 <a name="l01772"></a>01772 <span class="keywordflow">return</span> $this->utf8_byte2char_pos($haystack,$byte_pos); <a name="l01773"></a>01773 } <a name="l01774"></a>01774 <a name="l01784"></a><a class="code" href="classt3lib__cs.html#9c53f7d3d136ece0860f8ac2b567c173">01784</a> function utf8_char2byte_pos($str,$pos) { <a name="l01785"></a>01785 $n = 0; <span class="comment">// number of characters found</span> <a name="l01786"></a>01786 $p = abs($pos); <span class="comment">// number of characters wanted</span> <a name="l01787"></a>01787 <a name="l01788"></a>01788 <span class="keywordflow">if</span> ($pos >= 0) { <a name="l01789"></a>01789 $i = 0; <a name="l01790"></a>01790 $d = 1; <a name="l01791"></a>01791 } <span class="keywordflow">else</span> { <a name="l01792"></a>01792 $i = strlen($str)-1; <a name="l01793"></a>01793 $d = -1; <a name="l01794"></a>01794 } <a name="l01795"></a>01795 <a name="l01796"></a>01796 <span class="keywordflow">for</span>( ; strlen($str{$i}) && $n<$p; $i+=$d) { <a name="l01797"></a>01797 $c = (<span class="keywordtype">int</span>)ord($str{$i}); <a name="l01798"></a>01798 <span class="keywordflow">if</span> (!($c & 0x80)) <span class="comment">// single-byte (0xxxxxx)</span> <a name="l01799"></a>01799 $n++; <a name="l01800"></a>01800 elseif (($c & 0xC0) == 0xC0) <span class="comment">// multi-byte starting byte (11xxxxxx)</span> <a name="l01801"></a>01801 $n++; <a name="l01802"></a>01802 } <a name="l01803"></a>01803 <span class="keywordflow">if</span> (!strlen($str{$i})) <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// offset beyond string length</span> <a name="l01804"></a>01804 <a name="l01805"></a>01805 <span class="keywordflow">if</span> ($pos >= 0) { <a name="l01806"></a>01806 <span class="comment">// skip trailing multi-byte data bytes</span> <a name="l01807"></a>01807 <span class="keywordflow">while</span> ((ord($str{$i}) & 0x80) && !(ord($str{$i}) & 0x40)) { $i++; } <a name="l01808"></a>01808 } <span class="keywordflow">else</span> { <a name="l01809"></a>01809 <span class="comment">// correct offset</span> <a name="l01810"></a>01810 $i++; <a name="l01811"></a>01811 } <a name="l01812"></a>01812 <a name="l01813"></a>01813 <span class="keywordflow">return</span> $i; <a name="l01814"></a>01814 } <a name="l01815"></a>01815 <a name="l01825"></a><a class="code" href="classt3lib__cs.html#f4203ca19d10582e50283e97445869a8">01825</a> function utf8_byte2char_pos($str,$pos) { <a name="l01826"></a>01826 $n = 0; <span class="comment">// number of characters</span> <a name="l01827"></a>01827 <span class="keywordflow">for</span>($i=$pos; $i>0; $i--) { <a name="l01828"></a>01828 $c = (<span class="keywordtype">int</span>)ord($str{$i}); <a name="l01829"></a>01829 <span class="keywordflow">if</span> (!($c & 0x80)) <span class="comment">// single-byte (0xxxxxx)</span> <a name="l01830"></a>01830 $n++; <a name="l01831"></a>01831 elseif (($c & 0xC0) == 0xC0) <span class="comment">// multi-byte starting byte (11xxxxxx)</span> <a name="l01832"></a>01832 $n++; <a name="l01833"></a>01833 } <a name="l01834"></a>01834 <span class="keywordflow">if</span> (!strlen($str{$i})) <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// offset beyond string length</span> <a name="l01835"></a>01835 <a name="l01836"></a>01836 <span class="keywordflow">return</span> $n; <a name="l01837"></a>01837 } <a name="l01838"></a>01838 <a name="l01848"></a><a class="code" href="classt3lib__cs.html#7c55e3bb2f0de48829aa84decb9f3921">01848</a> function utf8_char_mapping($str,$mode,$opt='') { <a name="l01849"></a>01849 <span class="keywordflow">if</span> (!$this->initUnicodeData($mode)) <span class="keywordflow">return</span> $str; <span class="comment">// do nothing</span> <a name="l01850"></a>01850 <a name="l01851"></a>01851 $out = ''; <a name="l01852"></a>01852 <span class="keywordflow">switch</span>($mode) { <a name="l01853"></a>01853 <span class="keywordflow">case</span> '<span class="keywordflow">case</span>': <a name="l01854"></a>01854 $map =& $this->caseFolding['utf-8'][$opt]; <a name="l01855"></a>01855 <span class="keywordflow">break</span>; <a name="l01856"></a>01856 <a name="l01857"></a>01857 <span class="keywordflow">case</span> 'ascii': <a name="l01858"></a>01858 $map =& $this->toASCII['utf-8']; <a name="l01859"></a>01859 <span class="keywordflow">break</span>; <a name="l01860"></a>01860 <a name="l01861"></a>01861 <span class="keywordflow">default</span>: <a name="l01862"></a>01862 <span class="keywordflow">return</span> $str; <a name="l01863"></a>01863 } <a name="l01864"></a>01864 <a name="l01865"></a>01865 <span class="keywordflow">for</span>($i=0; strlen($str{$i}); $i++) { <a name="l01866"></a>01866 $c = ord($str{$i}); <a name="l01867"></a>01867 <span class="keywordflow">if</span> (!($c & 0x80)) <span class="comment">// single-byte (0xxxxxx)</span> <a name="l01868"></a>01868 $mbc = $str{$i}; <a name="l01869"></a>01869 elseif (($c & 0xC0) == 0xC0) { <span class="comment">// multi-byte starting byte (11xxxxxx)</span> <a name="l01870"></a>01870 <span class="keywordflow">for</span> ($bc=0; $c & 0x80; $c = $c << 1) { $bc++; } <span class="comment">// calculate number of bytes</span> <a name="l01871"></a>01871 $mbc = substr($str,$i,$bc); <a name="l01872"></a>01872 $i += $bc-1; <a name="l01873"></a>01873 } <a name="l01874"></a>01874 <a name="l01875"></a>01875 <span class="keywordflow">if</span> (isset($map[$mbc])) { <a name="l01876"></a>01876 $out .= $map[$mbc]; <a name="l01877"></a>01877 } <span class="keywordflow">else</span> { <a name="l01878"></a>01878 $out .= $mbc; <a name="l01879"></a>01879 } <a name="l01880"></a>01880 } <a name="l01881"></a>01881 <a name="l01882"></a>01882 <span class="keywordflow">return</span> $out; <a name="l01883"></a>01883 } <a name="l01884"></a>01884 <a name="l01885"></a>01885 <a name="l01886"></a>01886 <a name="l01887"></a>01887 <a name="l01888"></a>01888 <a name="l01889"></a>01889 <a name="l01890"></a>01890 <a name="l01891"></a>01891 <a name="l01892"></a>01892 <a name="l01893"></a>01893 <a name="l01894"></a>01894 <a name="l01895"></a>01895 <a name="l01896"></a>01896 <a name="l01897"></a>01897 <a name="l01898"></a>01898 <a name="l01899"></a>01899 <a name="l01900"></a>01900 <a name="l01901"></a>01901 <a name="l01902"></a>01902 <span class="comment">/********************************************</span> <a name="l01903"></a>01903 <span class="comment"> *</span> <a name="l01904"></a>01904 <span class="comment"> * Internal EUC string operation functions</span> <a name="l01905"></a>01905 <span class="comment"> *</span> <a name="l01906"></a>01906 <span class="comment"> * Extended Unix Code:</span> <a name="l01907"></a>01907 <span class="comment"> * ASCII compatible 7bit single bytes chars</span> <a name="l01908"></a>01908 <span class="comment"> * 8bit two byte chars</span> <a name="l01909"></a>01909 <span class="comment"> *</span> <a name="l01910"></a>01910 <span class="comment"> * Shift-JIS is treated as a special case.</span> <a name="l01911"></a>01911 <span class="comment"> *</span> <a name="l01912"></a>01912 <span class="comment"> ********************************************/</span> <a name="l01913"></a>01913 <a name="l01924"></a><a class="code" href="classt3lib__cs.html#f3f6679f979e1445585ebb4d1228bd59">01924</a> function euc_strtrunc($str,$len,$charset) { <a name="l01925"></a>01925 $sjis = ($charset == 'shift_jis'); <a name="l01926"></a>01926 <span class="keywordflow">for</span> ($i=0; strlen($str{$i}) && $i<$len; $i++) { <a name="l01927"></a>01927 $c = ord($str{$i}); <a name="l01928"></a>01928 <span class="keywordflow">if</span> ($sjis) { <a name="l01929"></a>01929 <span class="keywordflow">if</span> (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++; <span class="comment">// advance a double-byte char</span> <a name="l01930"></a>01930 } <a name="l01931"></a>01931 <span class="keywordflow">else</span> { <a name="l01932"></a>01932 <span class="keywordflow">if</span> ($c >= 0x80) $i++; <span class="comment">// advance a double-byte char</span> <a name="l01933"></a>01933 } <a name="l01934"></a>01934 } <a name="l01935"></a>01935 <span class="keywordflow">if</span> (!strlen($str{$i})) <span class="keywordflow">return</span> $str; <span class="comment">// string shorter than supplied length</span> <a name="l01936"></a>01936 <a name="l01937"></a>01937 <span class="keywordflow">if</span> ($i>$len) <a name="l01938"></a>01938 <span class="keywordflow">return</span> substr($str,0,$len-1); <span class="comment">// we ended on a first byte</span> <a name="l01939"></a>01939 <span class="keywordflow">else</span> <a name="l01940"></a>01940 <span class="keywordflow">return</span> substr($str,0,$len); <a name="l01941"></a>01941 } <a name="l01942"></a>01942 <a name="l01953"></a><a class="code" href="classt3lib__cs.html#df152592ef0a039f8d6d2fee3791cc18">01953</a> function euc_substr($str,$start,$charset,$len=null) { <a name="l01954"></a>01954 $byte_start = $this->euc_char2byte_pos($str,$start,$charset); <a name="l01955"></a>01955 <span class="keywordflow">if</span> ($byte_start === <span class="keyword">false</span>) <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// $start outside string length</span> <a name="l01956"></a>01956 <a name="l01957"></a>01957 $str = substr($str,$byte_start); <a name="l01958"></a>01958 <a name="l01959"></a>01959 <span class="keywordflow">if</span> ($len!=null) { <a name="l01960"></a>01960 $byte_end = $this->euc_char2byte_pos($str,$len,$charset); <a name="l01961"></a>01961 <span class="keywordflow">if</span> ($byte_end === <span class="keyword">false</span>) <span class="comment">// $len outside actual string length</span> <a name="l01962"></a>01962 <span class="keywordflow">return</span> $str; <a name="l01963"></a>01963 <span class="keywordflow">else</span> <a name="l01964"></a>01964 <span class="keywordflow">return</span> substr($str,0,$byte_end); <a name="l01965"></a>01965 } <a name="l01966"></a>01966 <span class="keywordflow">else</span> <span class="keywordflow">return</span> $str; <a name="l01967"></a>01967 } <a name="l01968"></a>01968 <a name="l01978"></a><a class="code" href="classt3lib__cs.html#3901ba21b7a765b9beb8ba094133559b">01978</a> function euc_strlen($str,$charset) { <a name="l01979"></a>01979 $sjis = ($charset == 'shift_jis'); <a name="l01980"></a>01980 $n=0; <a name="l01981"></a>01981 <span class="keywordflow">for</span> ($i=0; strlen($str{$i}); $i++) { <a name="l01982"></a>01982 $c = ord($str{$i}); <a name="l01983"></a>01983 <span class="keywordflow">if</span> ($sjis) { <a name="l01984"></a>01984 <span class="keywordflow">if</span> (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++; <span class="comment">// advance a double-byte char</span> <a name="l01985"></a>01985 } <a name="l01986"></a>01986 <span class="keywordflow">else</span> { <a name="l01987"></a>01987 <span class="keywordflow">if</span> ($c >= 0x80) $i++; <span class="comment">// advance a double-byte char</span> <a name="l01988"></a>01988 } <a name="l01989"></a>01989 <a name="l01990"></a>01990 $n++; <a name="l01991"></a>01991 } <a name="l01992"></a>01992 <a name="l01993"></a>01993 <span class="keywordflow">return</span> $n; <a name="l01994"></a>01994 } <a name="l01995"></a>01995 <a name="l02005"></a><a class="code" href="classt3lib__cs.html#a8ea3e301a55d7e9a40d952d4b9e05dd">02005</a> function euc_char2byte_pos($str,$pos,$charset) { <a name="l02006"></a>02006 $sjis = ($charset == 'shift_jis'); <a name="l02007"></a>02007 $n = 0; <span class="comment">// number of characters seen</span> <a name="l02008"></a>02008 $p = abs($pos); <span class="comment">// number of characters wanted</span> <a name="l02009"></a>02009 <a name="l02010"></a>02010 <span class="keywordflow">if</span> ($pos >= 0) { <a name="l02011"></a>02011 $i = 0; <a name="l02012"></a>02012 $d = 1; <a name="l02013"></a>02013 } <span class="keywordflow">else</span> { <a name="l02014"></a>02014 $i = strlen($str)-1; <a name="l02015"></a>02015 $d = -1; <a name="l02016"></a>02016 } <a name="l02017"></a>02017 <a name="l02018"></a>02018 <span class="keywordflow">for</span> ( ; strlen($str{$i}) && $n<$p; $i+=$d) { <a name="l02019"></a>02019 $c = ord($str{$i}); <a name="l02020"></a>02020 <span class="keywordflow">if</span> ($sjis) { <a name="l02021"></a>02021 <span class="keywordflow">if</span> (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i+=$d; <span class="comment">// advance a double-byte char</span> <a name="l02022"></a>02022 } <a name="l02023"></a>02023 <span class="keywordflow">else</span> { <a name="l02024"></a>02024 <span class="keywordflow">if</span> ($c >= 0x80) $i+=$d; <span class="comment">// advance a double-byte char</span> <a name="l02025"></a>02025 } <a name="l02026"></a>02026 <a name="l02027"></a>02027 $n++; <a name="l02028"></a>02028 } <a name="l02029"></a>02029 <span class="keywordflow">if</span> (!strlen($str{$i})) <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// offset beyond string length</span> <a name="l02030"></a>02030 <a name="l02031"></a>02031 <span class="keywordflow">if</span> ($pos < 0) $i++; <span class="comment">// correct offset</span> <a name="l02032"></a>02032 <a name="l02033"></a>02033 <span class="keywordflow">return</span> $i; <a name="l02034"></a>02034 } <a name="l02035"></a>02035 <a name="l02046"></a><a class="code" href="classt3lib__cs.html#37a9c924fb11389373cbead3f6bbe5a0">02046</a> function euc_char_mapping($str,$charset,$mode,$opt='') { <a name="l02047"></a>02047 <span class="keywordflow">switch</span>($mode) { <a name="l02048"></a>02048 <span class="keywordflow">case</span> '<span class="keywordflow">case</span>': <a name="l02049"></a>02049 <span class="keywordflow">if</span> (!$this->initCaseFolding($charset)) <span class="keywordflow">return</span> $str; <span class="comment">// do nothing</span> <a name="l02050"></a>02050 $map =& $this->caseFolding[$charset][$opt]; <a name="l02051"></a>02051 <span class="keywordflow">break</span>; <a name="l02052"></a>02052 <a name="l02053"></a>02053 <span class="keywordflow">case</span> 'ascii': <a name="l02054"></a>02054 <span class="keywordflow">if</span> (!$this->initToASCII($charset)) <span class="keywordflow">return</span> $str; <span class="comment">// do nothing</span> <a name="l02055"></a>02055 $map =& $this->toASCII[$charset]; <a name="l02056"></a>02056 <span class="keywordflow">break</span>; <a name="l02057"></a>02057 <a name="l02058"></a>02058 <span class="keywordflow">default</span>: <a name="l02059"></a>02059 <span class="keywordflow">return</span> $str; <a name="l02060"></a>02060 } <a name="l02061"></a>02061 <a name="l02062"></a>02062 $sjis = ($charset == 'shift_jis'); <a name="l02063"></a>02063 $out = ''; <a name="l02064"></a>02064 <span class="keywordflow">for</span>($i=0; strlen($str{$i}); $i++) { <a name="l02065"></a>02065 $mbc = $str{$i}; <a name="l02066"></a>02066 $c = ord($mbc); <a name="l02067"></a>02067 <a name="l02068"></a>02068 <span class="keywordflow">if</span> ($sjis) { <a name="l02069"></a>02069 <span class="keywordflow">if</span> (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) { <span class="comment">// a double-byte char</span> <a name="l02070"></a>02070 $mbc = substr($str,$i,2); <a name="l02071"></a>02071 $i++; <a name="l02072"></a>02072 } <a name="l02073"></a>02073 } <a name="l02074"></a>02074 <span class="keywordflow">else</span> { <a name="l02075"></a>02075 <span class="keywordflow">if</span> ($c >= 0x80) { <span class="comment">// a double-byte char</span> <a name="l02076"></a>02076 $mbc = substr($str,$i,2); <a name="l02077"></a>02077 $i++; <a name="l02078"></a>02078 } <a name="l02079"></a>02079 } <a name="l02080"></a>02080 <a name="l02081"></a>02081 <span class="keywordflow">if</span> (isset($map[$mbc])) { <a name="l02082"></a>02082 $out .= $map[$mbc]; <a name="l02083"></a>02083 } <span class="keywordflow">else</span> { <a name="l02084"></a>02084 $out .= $mbc; <a name="l02085"></a>02085 } <a name="l02086"></a>02086 } <a name="l02087"></a>02087 <a name="l02088"></a>02088 <span class="keywordflow">return</span> $out; <a name="l02089"></a>02089 } <a name="l02090"></a>02090 <a name="l02091"></a>02091 } <a name="l02092"></a>02092 <a name="l02093"></a>02093 <span class="keywordflow">if</span> (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/<span class="keyword">class</span>.<a class="code" href="classt3lib__cs.html">t3lib_cs</a>.php']) { <a name="l02094"></a>02094 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/<span class="keyword">class</span>.<a class="code" href="classt3lib__cs.html">t3lib_cs</a>.php']); <a name="l02095"></a>02095 } <a name="l02096"></a>02096 ?> </pre></div><?php include_once '../doc-typo3-funcs.php'; get_footer(); ?>