<?php include_once '../doc-typo3-funcs.php'; $doxygen_vars = array( "title" => "TYPO3 4.0.1: typo3_src-4.0.1/typo3/sysext/indexed_search/class.external_parser.php Source File", "datetime" => "Sat Dec 2 19:22:32 2006", "date" => "2 Dec 2006", "doxygenversion" => "1.4.6", "projectname" => "TYPO3 4.0.1", "projectnumber" => "4.0.1" ); get_header($doxygen_vars); ?> <!-- Generated by Doxygen 1.4.6 --> <div class="tabs"> <ul> <li><a href="main.html"><span>Main Page</span></a></li> <li><a href="namespaces.html"><span>Namespaces</span></a></li> <li><a href="classes.html"><span>Classes</span></a></li> <li id="current"><a href="files.html"><span>Files</span></a></li> <li><a href="dirs.html"><span>Directories</span></a></li> <li><a href="pages.html"><span>Related Pages</span></a></li> <li><a href="examples.html"><span>Examples</span></a></li> <li> <form action="search.php" method="get"> <table cellspacing="0" cellpadding="0" border="0"> <tr> <td><label> <u>S</u>earch for </label></td> <td><input type="text" name="query" value="" size="20" accesskey="s"/></td> </tr> </table> </form> </li> </ul></div> <div class="nav"> <a class="el" href="dir_c8daf1ad746050abf985cc546c89e248.html">typo3_src-4.0.1</a> » <a class="el" href="dir_18071ae4545d8b3e0364d30c0659c74a.html">typo3</a> » <a class="el" href="dir_57bf1ed8249c1fd5b014486d01bcb27a.html">sysext</a> » <a class="el" href="dir_1144f7dd65e866e7cd4aa66020137172.html">indexed_search</a></div> <h1>class.external_parser.php</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <?php <a name="l00002"></a>00002 <span class="comment">/***************************************************************</span> <a name="l00003"></a>00003 <span class="comment">* Copyright notice</span> <a name="l00004"></a>00004 <span class="comment">*</span> <a name="l00005"></a>00005 <span class="comment">* (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)</span> <a name="l00006"></a>00006 <span class="comment">* All rights reserved</span> <a name="l00007"></a>00007 <span class="comment">*</span> <a name="l00008"></a>00008 <span class="comment">* This script is part of the TYPO3 project. The TYPO3 project is</span> <a name="l00009"></a>00009 <span class="comment">* free software; you can redistribute it and/or modify</span> <a name="l00010"></a>00010 <span class="comment">* it under the terms of the GNU General Public License as published by</span> <a name="l00011"></a>00011 <span class="comment">* the Free Software Foundation; either version 2 of the License, or</span> <a name="l00012"></a>00012 <span class="comment">* (at your option) any later version.</span> <a name="l00013"></a>00013 <span class="comment">*</span> <a name="l00014"></a>00014 <span class="comment">* The GNU General Public License can be found at</span> <a name="l00015"></a>00015 <span class="comment">* http://www.gnu.org/copyleft/gpl.html.</span> <a name="l00016"></a>00016 <span class="comment">* A copy is found in the textfile GPL.txt and important notices to the license</span> <a name="l00017"></a>00017 <span class="comment">* from the author is found in LICENSE.txt distributed with these scripts.</span> <a name="l00018"></a>00018 <span class="comment">*</span> <a name="l00019"></a>00019 <span class="comment">*</span> <a name="l00020"></a>00020 <span class="comment">* This script is distributed in the hope that it will be useful,</span> <a name="l00021"></a>00021 <span class="comment">* but WITHOUT ANY WARRANTY; without even the implied warranty of</span> <a name="l00022"></a>00022 <span class="comment">* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the</span> <a name="l00023"></a>00023 <span class="comment">* GNU General Public License for more details.</span> <a name="l00024"></a>00024 <span class="comment">*</span> <a name="l00025"></a>00025 <span class="comment">* This copyright notice MUST APPEAR in all copies of the script!</span> <a name="l00026"></a>00026 <span class="comment">***************************************************************/</span> <a name="l00075"></a><a class="code" href="classtx__indexed__search__extparse.html">00075</a> <span class="keyword">class </span><a class="code" href="classtx__indexed__search__extparse.html">tx_indexed_search_extparse</a> { <a name="l00076"></a>00076 <a name="l00077"></a>00077 <span class="comment">// This value is also overridden from config.</span> <a name="l00078"></a><a class="code" href="classtx__indexed__search__extparse.html#483c9c6b542bda541591959eff79aa7f">00078</a> var <a class="code" href="classtx__indexed__search__extparse.html#483c9c6b542bda541591959eff79aa7f">$pdf_mode</a> = -20; <span class="comment">// zero: whole PDF file is indexed in one. positive value: Indicates number of pages at a time, eg. "5" would means 1-5,6-10,.... Negative integer would indicate (abs value) number of groups. Eg "3" groups of 10 pages would be 1-4,5-8,9-10</span> <a name="l00079"></a>00079 <a name="l00080"></a>00080 <span class="comment">// This array is configured in initialization:</span> <a name="l00081"></a><a class="code" href="classtx__indexed__search__extparse.html#6ccbe78d9365a6ee9f26bb912588f804">00081</a> var <a class="code" href="classtx__indexed__search__extparse.html#6ccbe78d9365a6ee9f26bb912588f804">$app</a> = array(); <a name="l00082"></a><a class="code" href="classtx__indexed__search__extparse.html#9f8cfdeeb28969198a9c77cf9b6737db">00082</a> var <a class="code" href="classtx__indexed__search__extparse.html#9f8cfdeeb28969198a9c77cf9b6737db">$ext2itemtype_map</a> = array(); <a name="l00083"></a><a class="code" href="classtx__indexed__search__extparse.html#c78b29692431ee5370e92344a5a0ca2d">00083</a> var <a class="code" href="classtx__indexed__search__extparse.html#c78b29692431ee5370e92344a5a0ca2d">$supportedExtensions</a> = array(); <a name="l00084"></a>00084 <a name="l00085"></a><a class="code" href="classtx__indexed__search__extparse.html#a6d3172574bef5eefdba59078293f546">00085</a> var <a class="code" href="classtx__indexed__search__extparse.html#a6d3172574bef5eefdba59078293f546">$pObj</a>; <span class="comment">// Reference to parent object (indexer class)</span> <a name="l00086"></a>00086 <a name="l00087"></a>00087 <a name="l00094"></a><a class="code" href="classtx__indexed__search__extparse.html#03b11751d3b7c7932c9636099380e22f">00094</a> function <a class="code" href="classtx__indexed__search__extparse.html#03b11751d3b7c7932c9636099380e22f">initParser</a>($extension) { <a name="l00095"></a>00095 <a name="l00096"></a>00096 <span class="comment">// Then read indexer-config and set if appropriate:</span> <a name="l00097"></a>00097 $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); <a name="l00098"></a>00098 <a name="l00099"></a>00099 <span class="comment">// If windows, apply extension to tool name:</span> <a name="l00100"></a>00100 $exe = (TYPO3_OS == 'WIN') ? '.exe' : ''; <span class="comment">// lg</span> <a name="l00101"></a>00101 $extOK = FALSE; <a name="l00102"></a>00102 $mainExtension = ''; <a name="l00103"></a>00103 <a name="l00104"></a>00104 <span class="comment">// Ignore extensions</span> <a name="l00105"></a>00105 $ignoreExtensions = <a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(<span class="charliteral">','</span>, strtolower($indexerConfig['ignoreExtensions']),1); <a name="l00106"></a>00106 <span class="keywordflow">if</span> (in_array($extension, $ignoreExtensions)) { <a name="l00107"></a>00107 $this->pObj->log_setTSlogMessage('Extension <span class="stringliteral">"'.$extension.'"</span> was set to be ignored.',1); <a name="l00108"></a>00108 <span class="keywordflow">return</span> FALSE; <a name="l00109"></a>00109 } <a name="l00110"></a>00110 <a name="l00111"></a>00111 <span class="comment">// Switch on file extension:</span> <a name="l00112"></a>00112 <span class="keywordflow">switch</span>($extension) { <a name="l00113"></a>00113 <span class="keywordflow">case</span> 'pdf': <a name="l00114"></a>00114 <span class="comment">// PDF</span> <a name="l00115"></a>00115 <span class="keywordflow">if</span> ($indexerConfig['pdftools']) { <a name="l00116"></a>00116 $pdfPath = ereg_replace(<span class="stringliteral">"\/$"</span>,'',$indexerConfig['pdftools']).<span class="charliteral">'/'</span>; <a name="l00117"></a>00117 <span class="keywordflow">if</span> (ini_get('safe_mode') || (@is_file($pdfPath.'pdftotext'.$exe) && @is_file($pdfPath.'pdfinfo'.$exe))) { <a name="l00118"></a>00118 $this->app['pdfinfo'] = $pdfPath.'pdfinfo'.$exe; <a name="l00119"></a>00119 $this->app['pdftotext'] = $pdfPath.'pdftotext'.$exe; <a name="l00120"></a>00120 <span class="comment">// PDF mode:</span> <a name="l00121"></a>00121 $this->pdf_mode = <a class="code" href="classt3lib__div.html#79f6a47a7658e28e3f65666f8ebc19f8">t3lib_div::intInRange</a>($indexerConfig['pdf_mode'],-100,100); <a name="l00122"></a>00122 $extOK = TRUE; <a name="l00123"></a>00123 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage(<span class="stringliteral">"PDF tools was not found in paths '"</span>.$pdfPath.<span class="stringliteral">"pdftotext' and/or '"</span>.$pdfPath.<span class="stringliteral">"pdfinfo'"</span>,3); <a name="l00124"></a>00124 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage('PDF tools disabled',1); <a name="l00125"></a>00125 <span class="keywordflow">break</span>; <a name="l00126"></a>00126 <span class="keywordflow">case</span> 'doc': <a name="l00127"></a>00127 <span class="comment">// Catdoc</span> <a name="l00128"></a>00128 <span class="keywordflow">if</span> ($indexerConfig['catdoc']) { <a name="l00129"></a>00129 $catdocPath = ereg_replace(<span class="stringliteral">"\/$"</span>,'',$indexerConfig['catdoc']).<span class="charliteral">'/'</span>; <a name="l00130"></a>00130 <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($catdocPath.'catdoc'.$exe)) { <a name="l00131"></a>00131 $this->app['catdoc'] = $catdocPath.'catdoc'.$exe; <a name="l00132"></a>00132 $extOK = TRUE; <a name="l00133"></a>00133 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage(<span class="stringliteral">"'catdoc' tool for reading Word-files was not found in path '"</span>.$catdocPath.<span class="stringliteral">"catdoc'"</span>,3); <a name="l00134"></a>00134 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage('catdoc tools (Word-files) disabled',1); <a name="l00135"></a>00135 <span class="keywordflow">break</span>; <a name="l00136"></a>00136 <span class="keywordflow">case</span> 'pps': <span class="comment">// MS PowerPoint(?)</span> <a name="l00137"></a>00137 <span class="keywordflow">case</span> 'ppt': <span class="comment">// MS PowerPoint</span> <a name="l00138"></a>00138 <span class="comment">// ppthtml</span> <a name="l00139"></a>00139 <span class="keywordflow">if</span> ($indexerConfig['ppthtml']) { <a name="l00140"></a>00140 $ppthtmlPath = ereg_replace('\/$<span class="charliteral">','</span>',$indexerConfig['ppthtml']).<span class="charliteral">'/'</span>; <a name="l00141"></a>00141 <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($ppthtmlPath.'ppthtml'.$exe)){ <a name="l00142"></a>00142 $this->app['ppthtml'] = $ppthtmlPath.'ppthtml'.$exe; <a name="l00143"></a>00143 $extOK = TRUE; <a name="l00144"></a>00144 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage(<span class="stringliteral">"'ppthtml' tool for reading Powerpoint-files was not found in path '"</span>.$ppthtmlPath.<span class="stringliteral">"ppthtml'"</span>,3); <a name="l00145"></a>00145 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage('ppthtml tools (Powerpoint-files) disabled',1); <a name="l00146"></a>00146 <span class="keywordflow">break</span>; <a name="l00147"></a>00147 <span class="keywordflow">case</span> 'xls': <span class="comment">// MS Excel</span> <a name="l00148"></a>00148 <span class="comment">// Xlhtml</span> <a name="l00149"></a>00149 <span class="keywordflow">if</span> ($indexerConfig['xlhtml']) { <a name="l00150"></a>00150 $xlhtmlPath = ereg_replace('\/$<span class="charliteral">','</span>',$indexerConfig['xlhtml']).<span class="charliteral">'/'</span>; <a name="l00151"></a>00151 <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($xlhtmlPath.'xlhtml'.$exe)){ <a name="l00152"></a>00152 $this->app['xlhtml'] = $xlhtmlPath.'xlhtml'.$exe; <a name="l00153"></a>00153 $extOK = TRUE; <a name="l00154"></a>00154 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage(<span class="stringliteral">"'xlhtml' tool for reading Excel-files was not found in path '"</span>.$xlhtmlPath.<span class="stringliteral">"xlhtml'"</span>,3); <a name="l00155"></a>00155 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage('xlhtml tools (Excel-files) disabled',1); <a name="l00156"></a>00156 <span class="keywordflow">break</span>; <a name="l00157"></a>00157 <span class="keywordflow">case</span> 'sxc': <span class="comment">// Open Office Calc.</span> <a name="l00158"></a>00158 <span class="keywordflow">case</span> 'sxi': <span class="comment">// Open Office Impress</span> <a name="l00159"></a>00159 <span class="keywordflow">case</span> 'sxw': <span class="comment">// Open Office Writer</span> <a name="l00160"></a>00160 <span class="keywordflow">case</span> 'ods': <span class="comment">// Oasis OpenDocument Spreadsheet</span> <a name="l00161"></a>00161 <span class="keywordflow">case</span> 'odp': <span class="comment">// Oasis OpenDocument Presentation</span> <a name="l00162"></a>00162 <span class="keywordflow">case</span> 'odt': <span class="comment">// Oasis OpenDocument Text</span> <a name="l00163"></a>00163 <span class="keywordflow">if</span> ($indexerConfig['unzip']) { <a name="l00164"></a>00164 $unzipPath = preg_replace('/\/$/<span class="charliteral">','</span>',$indexerConfig['unzip']).<span class="charliteral">'/'</span>; <a name="l00165"></a>00165 <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($unzipPath.'unzip'.$exe)) { <a name="l00166"></a>00166 $this->app['unzip'] = $unzipPath.'unzip'.$exe; <a name="l00167"></a>00167 $extOK = TRUE; <a name="l00168"></a>00168 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage(<span class="stringliteral">"'unzip' tool for reading OpenOffice.org-files was not found in path '"</span>.$unzipPath.<span class="stringliteral">"unzip'"</span>,3); <a name="l00169"></a>00169 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage('unzip tool (OpenOffice.org-files) disabled',1); <a name="l00170"></a>00170 <span class="keywordflow">break</span>; <a name="l00171"></a>00171 <span class="keywordflow">case</span> 'rtf': <a name="l00172"></a>00172 <span class="comment">// Catdoc</span> <a name="l00173"></a>00173 <span class="keywordflow">if</span> ($indexerConfig['unrtf']) { <a name="l00174"></a>00174 $unrtfPath = ereg_replace(<span class="stringliteral">"\/$"</span>,'',$indexerConfig['unrtf']).<span class="charliteral">'/'</span>; <a name="l00175"></a>00175 <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($unrtfPath.'unrtf'.$exe)) { <a name="l00176"></a>00176 $this->app['unrtf'] = $unrtfPath.'unrtf'.$exe; <a name="l00177"></a>00177 $extOK = TRUE; <a name="l00178"></a>00178 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage(<span class="stringliteral">"'unrtf' tool for reading RTF-files was not found in path '"</span>.$unrtfPath.<span class="stringliteral">"unrtf'"</span>,3); <a name="l00179"></a>00179 } <span class="keywordflow">else</span> $this->pObj->log_setTSlogMessage('unrtf tool (RTF-files) disabled',1); <a name="l00180"></a>00180 <span class="keywordflow">break</span>; <a name="l00181"></a>00181 <span class="keywordflow">case</span> 'txt': <span class="comment">// Raw text</span> <a name="l00182"></a>00182 <span class="keywordflow">case</span> 'csv': <span class="comment">// Raw text</span> <a name="l00183"></a>00183 <span class="keywordflow">case</span> 'xml': <span class="comment">// PHP strip-tags()</span> <a name="l00184"></a>00184 <span class="keywordflow">case</span> 'tif': <span class="comment">// PHP EXIF</span> <a name="l00185"></a>00185 $extOK = TRUE; <a name="l00186"></a>00186 <span class="keywordflow">break</span>; <a name="l00187"></a>00187 <span class="keywordflow">case</span> 'html': <span class="comment">// PHP strip-tags()</span> <a name="l00188"></a>00188 <span class="keywordflow">case</span> 'htm': <span class="comment">// PHP strip-tags()</span> <a name="l00189"></a>00189 $extOK = TRUE; <a name="l00190"></a>00190 $mainExtension = 'html'; <span class="comment">// making "html" the common "item_type"</span> <a name="l00191"></a>00191 <span class="keywordflow">break</span>; <a name="l00192"></a>00192 <span class="keywordflow">case</span> 'jpg': <span class="comment">// PHP EXIF</span> <a name="l00193"></a>00193 <span class="keywordflow">case</span> 'jpeg': <span class="comment">// PHP EXIF</span> <a name="l00194"></a>00194 $extOK = TRUE; <a name="l00195"></a>00195 $mainExtension = 'jpeg'; <span class="comment">// making "jpeg" the common item_type</span> <a name="l00196"></a>00196 <span class="keywordflow">break</span>; <a name="l00197"></a>00197 } <a name="l00198"></a>00198 <a name="l00199"></a>00199 <span class="comment">// If extension was OK:</span> <a name="l00200"></a>00200 <span class="keywordflow">if</span> ($extOK) { <a name="l00201"></a>00201 $this->supportedExtensions[$extension] = TRUE; <a name="l00202"></a>00202 $this->ext2itemtype_map[$extension] = $mainExtension ? $mainExtension : $extension; <a name="l00203"></a>00203 <span class="keywordflow">return</span> TRUE; <a name="l00204"></a>00204 } <a name="l00205"></a>00205 } <a name="l00206"></a>00206 <a name="l00214"></a><a class="code" href="classtx__indexed__search__extparse.html#6c7999dfdc13d3188d63085fb7c6c4ea">00214</a> function <a class="code" href="classtx__indexed__search__extparse.html#6c7999dfdc13d3188d63085fb7c6c4ea">softInit</a>($extension) { <a name="l00215"></a>00215 <span class="keywordflow">switch</span>($extension) { <a name="l00216"></a>00216 <span class="keywordflow">case</span> 'pdf': <span class="comment">// PDF</span> <a name="l00217"></a>00217 <span class="keywordflow">case</span> 'doc': <span class="comment">// MS Word files</span> <a name="l00218"></a>00218 <span class="keywordflow">case</span> 'pps': <span class="comment">// MS PowerPoint</span> <a name="l00219"></a>00219 <span class="keywordflow">case</span> 'ppt': <span class="comment">// MS PowerPoint</span> <a name="l00220"></a>00220 <span class="keywordflow">case</span> 'xls': <span class="comment">// MS Excel</span> <a name="l00221"></a>00221 <span class="keywordflow">case</span> 'sxc': <span class="comment">// Open Office Calc.</span> <a name="l00222"></a>00222 <span class="keywordflow">case</span> 'sxi': <span class="comment">// Open Office Impress</span> <a name="l00223"></a>00223 <span class="keywordflow">case</span> 'sxw': <span class="comment">// Open Office Writer</span> <a name="l00224"></a>00224 <span class="keywordflow">case</span> 'ods': <span class="comment">// Oasis OpenDocument Spreadsheet</span> <a name="l00225"></a>00225 <span class="keywordflow">case</span> 'odp': <span class="comment">// Oasis OpenDocument Presentation</span> <a name="l00226"></a>00226 <span class="keywordflow">case</span> 'odt': <span class="comment">// Oasis OpenDocument Text</span> <a name="l00227"></a>00227 <span class="keywordflow">case</span> 'rtf': <span class="comment">// RTF documents</span> <a name="l00228"></a>00228 <span class="keywordflow">case</span> 'txt': <span class="comment">// ASCII Text documents</span> <a name="l00229"></a>00229 <span class="keywordflow">case</span> 'html': <span class="comment">// HTML</span> <a name="l00230"></a>00230 <span class="keywordflow">case</span> 'htm': <span class="comment">// HTML</span> <a name="l00231"></a>00231 <span class="keywordflow">case</span> 'csv': <span class="comment">// Comma Separated Values</span> <a name="l00232"></a>00232 <span class="keywordflow">case</span> 'xml': <span class="comment">// Generic XML</span> <a name="l00233"></a>00233 <span class="keywordflow">case</span> 'jpg': <span class="comment">// Jpeg images (EXIF comment)</span> <a name="l00234"></a>00234 <span class="keywordflow">case</span> 'jpeg': <span class="comment">// Jpeg images (EXIF comment)</span> <a name="l00235"></a>00235 <span class="keywordflow">case</span> 'tif': <span class="comment">// TIF images (EXIF comment)</span> <a name="l00236"></a>00236 <span class="keywordflow">return</span> TRUE; <a name="l00237"></a>00237 <span class="keywordflow">break</span>; <a name="l00238"></a>00238 } <a name="l00239"></a>00239 } <a name="l00240"></a>00240 <a name="l00247"></a><a class="code" href="classtx__indexed__search__extparse.html#b6e93a955bf2f737529d1e615d327712">00247</a> function <a class="code" href="classtx__indexed__search__extparse.html#b6e93a955bf2f737529d1e615d327712">searchTypeMediaTitle</a>($extension) { <a name="l00248"></a>00248 <a name="l00249"></a>00249 <span class="comment">// Read indexer-config</span> <a name="l00250"></a>00250 $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); <a name="l00251"></a>00251 <a name="l00252"></a>00252 <span class="comment">// Ignore extensions</span> <a name="l00253"></a>00253 $ignoreExtensions = <a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(<span class="charliteral">','</span>, strtolower($indexerConfig['ignoreExtensions']),1); <a name="l00254"></a>00254 <span class="keywordflow">if</span> (in_array($extension, $ignoreExtensions)) { <a name="l00255"></a>00255 <span class="keywordflow">return</span> FALSE; <a name="l00256"></a>00256 } <a name="l00257"></a>00257 <a name="l00258"></a>00258 <span class="comment">// Switch on file extension:</span> <a name="l00259"></a>00259 <span class="keywordflow">switch</span>($extension) { <a name="l00260"></a>00260 <span class="keywordflow">case</span> 'pdf': <a name="l00261"></a>00261 <span class="comment">// PDF</span> <a name="l00262"></a>00262 <span class="keywordflow">if</span> ($indexerConfig['pdftools']) { <a name="l00263"></a>00263 <span class="keywordflow">return</span> 'PDF'; <a name="l00264"></a>00264 } <a name="l00265"></a>00265 <span class="keywordflow">break</span>; <a name="l00266"></a>00266 <span class="keywordflow">case</span> 'doc': <a name="l00267"></a>00267 <span class="comment">// Catdoc</span> <a name="l00268"></a>00268 <span class="keywordflow">if</span> ($indexerConfig['catdoc']) { <a name="l00269"></a>00269 <span class="keywordflow">return</span> 'MS Word'; <a name="l00270"></a>00270 } <a name="l00271"></a>00271 <span class="keywordflow">break</span>; <a name="l00272"></a>00272 <span class="keywordflow">case</span> 'pps': <span class="comment">// MS PowerPoint(?)</span> <a name="l00273"></a>00273 <span class="keywordflow">case</span> 'ppt': <span class="comment">// MS PowerPoint</span> <a name="l00274"></a>00274 <span class="comment">// ppthtml</span> <a name="l00275"></a>00275 <span class="keywordflow">if</span> ($indexerConfig['ppthtml']) { <a name="l00276"></a>00276 <span class="keywordflow">return</span> 'MS Powerpoint'; <a name="l00277"></a>00277 } <a name="l00278"></a>00278 <span class="keywordflow">break</span>; <a name="l00279"></a>00279 <span class="keywordflow">case</span> 'xls': <span class="comment">// MS Excel</span> <a name="l00280"></a>00280 <span class="comment">// Xlhtml</span> <a name="l00281"></a>00281 <span class="keywordflow">if</span> ($indexerConfig['xlhtml']) { <a name="l00282"></a>00282 <span class="keywordflow">return</span> 'MS Excel'; <a name="l00283"></a>00283 } <a name="l00284"></a>00284 <span class="keywordflow">break</span>; <a name="l00285"></a>00285 <span class="keywordflow">case</span> 'sxc': <span class="comment">// Open Office Calc.</span> <a name="l00286"></a>00286 <span class="keywordflow">case</span> 'sxi': <span class="comment">// Open Office Impress</span> <a name="l00287"></a>00287 <span class="keywordflow">case</span> 'sxw': <span class="comment">// Open Office Writer</span> <a name="l00288"></a>00288 <span class="keywordflow">case</span> 'ods': <span class="comment">// Oasis OpenDocument Spreadsheet</span> <a name="l00289"></a>00289 <span class="keywordflow">case</span> 'odp': <span class="comment">// Oasis OpenDocument Presentation</span> <a name="l00290"></a>00290 <span class="keywordflow">case</span> 'odt': <span class="comment">// Oasis OpenDocument Text</span> <a name="l00291"></a>00291 <span class="keywordflow">if</span> ($indexerConfig['unzip']) { <a name="l00292"></a>00292 <span class="keywordflow">return</span> 'Open Office'; <a name="l00293"></a>00293 } <a name="l00294"></a>00294 <span class="keywordflow">break</span>; <a name="l00295"></a>00295 <span class="keywordflow">case</span> 'rtf': <a name="l00296"></a>00296 <span class="comment">// Catdoc</span> <a name="l00297"></a>00297 <span class="keywordflow">if</span> ($indexerConfig['unrtf']) { <a name="l00298"></a>00298 <span class="keywordflow">return</span> 'RTF'; <a name="l00299"></a>00299 } <a name="l00300"></a>00300 <span class="keywordflow">break</span>; <a name="l00301"></a>00301 <span class="keywordflow">case</span> 'html': <span class="comment">// PHP strip-tags()</span> <a name="l00302"></a>00302 <span class="keywordflow">case</span> 'jpeg': <span class="comment">// PHP EXIF</span> <a name="l00303"></a>00303 <span class="keywordflow">case</span> 'txt': <span class="comment">// Raw text</span> <a name="l00304"></a>00304 <span class="keywordflow">case</span> 'csv': <span class="comment">// Raw text</span> <a name="l00305"></a>00305 <span class="keywordflow">case</span> 'xml': <span class="comment">// PHP strip-tags()</span> <a name="l00306"></a>00306 <span class="keywordflow">case</span> 'tif': <span class="comment">// PHP EXIF</span> <a name="l00307"></a>00307 <span class="keywordflow">return</span> strtoupper($extension); <a name="l00308"></a>00308 <span class="keywordflow">break</span>; <a name="l00309"></a>00309 <span class="comment">// NO entry (duplicates or blank):</span> <a name="l00310"></a>00310 <span class="keywordflow">case</span> 'htm': <span class="comment">// PHP strip-tags()</span> <a name="l00311"></a>00311 <span class="keywordflow">case</span> 'jpg': <span class="comment">// PHP EXIF</span> <a name="l00312"></a>00312 <span class="keywordflow">default</span>: <a name="l00313"></a>00313 <span class="keywordflow">break</span>; <a name="l00314"></a>00314 } <a name="l00315"></a>00315 } <a name="l00316"></a>00316 <a name="l00323"></a><a class="code" href="classtx__indexed__search__extparse.html#423bc2a451130a37ac96b89f7eae7605">00323</a> function <a class="code" href="classtx__indexed__search__extparse.html#423bc2a451130a37ac96b89f7eae7605">isMultiplePageExtension</a>($extension) { <a name="l00324"></a>00324 <span class="comment">// Switch on file extension:</span> <a name="l00325"></a>00325 <span class="keywordflow">switch</span>((string)$extension) { <a name="l00326"></a>00326 <span class="keywordflow">case</span> 'pdf': <a name="l00327"></a>00327 <span class="keywordflow">return</span> TRUE; <a name="l00328"></a>00328 <span class="keywordflow">break</span>; <a name="l00329"></a>00329 } <a name="l00330"></a>00330 } <a name="l00331"></a>00331 <a name="l00332"></a>00332 <a name="l00333"></a>00333 <a name="l00334"></a>00334 <a name="l00335"></a>00335 <a name="l00336"></a>00336 <a name="l00337"></a>00337 <a name="l00338"></a>00338 <a name="l00339"></a>00339 <a name="l00340"></a>00340 <span class="comment">/************************</span> <a name="l00341"></a>00341 <span class="comment"> *</span> <a name="l00342"></a>00342 <span class="comment"> * Reading documents (for parsing)</span> <a name="l00343"></a>00343 <span class="comment"> *</span> <a name="l00344"></a>00344 <span class="comment"> ************************/</span> <a name="l00345"></a>00345 <a name="l00354"></a><a class="code" href="classtx__indexed__search__extparse.html#ecd64ada0ac29b1ae5079df31cddaaa1">00354</a> function <a class="code" href="classtx__indexed__search__extparse.html#ecd64ada0ac29b1ae5079df31cddaaa1">readFileContent</a>($ext,$absFile,$cPKey) { <a name="l00355"></a>00355 unset($contentArr); <a name="l00356"></a>00356 <a name="l00357"></a>00357 <span class="comment">// Return immediately if initialization didn't set support up:</span> <a name="l00358"></a>00358 <span class="keywordflow">if</span> (!$this->supportedExtensions[$ext]) <span class="keywordflow">return</span> FALSE; <a name="l00359"></a>00359 <a name="l00360"></a>00360 <span class="comment">// Switch by file extension</span> <a name="l00361"></a>00361 <span class="keywordflow">switch</span> ($ext) { <a name="l00362"></a>00362 <span class="keywordflow">case</span> 'pdf': <a name="l00363"></a>00363 <span class="keywordflow">if</span> ($this->app['pdfinfo']) { <a name="l00364"></a>00364 <span class="comment">// Getting pdf-info:</span> <a name="l00365"></a>00365 $cmd = $this->app['pdfinfo'].' <span class="stringliteral">"'.$absFile.'"</span>'; <a name="l00366"></a>00366 exec($cmd,$res); <a name="l00367"></a>00367 $pdfInfo = $this-><a class="code" href="classtx__indexed__search__extparse.html#b410aa49a477e84b1ea78e5287a6a121">splitPdfInfo</a>($res); <a name="l00368"></a>00368 <span class="keywordflow">if</span> (intval($pdfInfo['pages'])) { <a name="l00369"></a>00369 list($low,$high) = explode(<span class="charliteral">'-'</span>,$cPKey); <a name="l00370"></a>00370 <a name="l00371"></a>00371 <span class="comment">// Get pdf content:</span> <a name="l00372"></a>00372 $tempFileName = <a class="code" href="classt3lib__div.html#e126c1b5d0f72003e39c2930d5f65f07">t3lib_div::tempnam</a>('Typo3_indexer'); <span class="comment">// Create temporary name</span> <a name="l00373"></a>00373 @unlink ($tempFileName); <span class="comment">// Delete if exists, just to be safe.</span> <a name="l00374"></a>00374 $cmd = $this->app['pdftotext'].' -f '.$low.' -l '.$high.' -enc UTF-8 -q <span class="stringliteral">"'.$absFile.'"</span> '.$tempFileName; <a name="l00375"></a>00375 exec($cmd,$res); <a name="l00376"></a>00376 <span class="keywordflow">if</span> (@is_file($tempFileName)) { <a name="l00377"></a>00377 $content = t3lib_div::getUrl($tempFileName); <a name="l00378"></a>00378 unlink($tempFileName); <a name="l00379"></a>00379 } <span class="keywordflow">else</span> { <a name="l00380"></a>00380 $this->pObj->log_setTSlogMessage('PDFtoText Failed on <span class="keyword">this</span> document: '.$absFile.<span class="stringliteral">". Maybe the PDF file is locked for printing or encrypted."</span>,2); <a name="l00381"></a>00381 } <a name="l00382"></a>00382 <span class="keywordflow">if</span> (strlen($content)) { <a name="l00383"></a>00383 $contentArr = $this->pObj->splitRegularContent($this-><a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">removeEndJunk</a>($content)); <a name="l00384"></a>00384 } <a name="l00385"></a>00385 } <a name="l00386"></a>00386 } <a name="l00387"></a>00387 <span class="keywordflow">break</span>; <a name="l00388"></a>00388 <span class="keywordflow">case</span> 'doc': <a name="l00389"></a>00389 <span class="keywordflow">if</span> ($this->app['catdoc']) { <a name="l00390"></a>00390 $cmd = $this->app['catdoc'].' -d utf-8 <span class="stringliteral">"'.$absFile.'"</span>'; <a name="l00391"></a>00391 exec($cmd,$res); <a name="l00392"></a>00392 $content = implode(chr(10),$res); <a name="l00393"></a>00393 $contentArr = $this->pObj->splitRegularContent($this-><a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">removeEndJunk</a>($content)); <a name="l00394"></a>00394 } <a name="l00395"></a>00395 <span class="keywordflow">break</span>; <a name="l00396"></a>00396 <span class="keywordflow">case</span> 'pps': <a name="l00397"></a>00397 <span class="keywordflow">case</span> 'ppt': <a name="l00398"></a>00398 <span class="keywordflow">if</span> ($this->app['ppthtml']) { <a name="l00399"></a>00399 $cmd = $this->app['ppthtml'].' <span class="stringliteral">"'.$absFile.'"</span>'; <a name="l00400"></a>00400 exec($cmd,$res); <a name="l00401"></a>00401 $content = implode(chr(10),$res); <a name="l00402"></a>00402 $content = $this->pObj->convertHTMLToUtf8($content); <a name="l00403"></a>00403 $contentArr = $this->pObj->splitHTMLContent($this-><a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">removeEndJunk</a>($content)); <a name="l00404"></a>00404 $contentArr['title'] = basename($absFile); <span class="comment">// Make sure the title doesn't expose the absolute path!</span> <a name="l00405"></a>00405 } <a name="l00406"></a>00406 <span class="keywordflow">break</span>; <a name="l00407"></a>00407 <span class="keywordflow">case</span> 'xls': <a name="l00408"></a>00408 <span class="keywordflow">if</span> ($this->app['xlhtml']) { <a name="l00409"></a>00409 $cmd = $this->app['xlhtml'].' -nc -te <span class="stringliteral">"'.$absFile.'"</span>'; <a name="l00410"></a>00410 exec($cmd,$res); <a name="l00411"></a>00411 $content = implode(chr(10),$res); <a name="l00412"></a>00412 $content = $this->pObj->convertHTMLToUtf8($content); <a name="l00413"></a>00413 $contentArr = $this->pObj->splitHTMLContent($this-><a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">removeEndJunk</a>($content)); <a name="l00414"></a>00414 $contentArr['title'] = basename($absFile); <span class="comment">// Make sure the title doesn't expose the absolute path!</span> <a name="l00415"></a>00415 } <a name="l00416"></a>00416 <span class="keywordflow">break</span>; <a name="l00417"></a>00417 <span class="keywordflow">case</span> 'sxi': <a name="l00418"></a>00418 <span class="keywordflow">case</span> 'sxc': <a name="l00419"></a>00419 <span class="keywordflow">case</span> 'sxw': <a name="l00420"></a>00420 <span class="keywordflow">case</span> 'ods': <a name="l00421"></a>00421 <span class="keywordflow">case</span> 'odp': <a name="l00422"></a>00422 <span class="keywordflow">case</span> 'odt': <a name="l00423"></a>00423 <span class="keywordflow">if</span> ($this->app['unzip']) { <a name="l00424"></a>00424 <span class="comment">// Read content.xml:</span> <a name="l00425"></a>00425 $cmd = $this->app['unzip'].' -p '.$absFile.' content.xml'; <a name="l00426"></a>00426 exec($cmd,$out); <a name="l00427"></a>00427 $content_xml = implode(chr(10),$out); <a name="l00428"></a>00428 <a name="l00429"></a>00429 <span class="comment">// Read meta.xml:</span> <a name="l00430"></a>00430 $cmd = $this->app['unzip'].' -p '.$absFile.' meta.xml'; <a name="l00431"></a>00431 exec($cmd, $out); <a name="l00432"></a>00432 $meta_xml = implode(chr(10),$out); <a name="l00433"></a>00433 <a name="l00434"></a>00434 $utf8_content = trim(strip_tags(str_replace(<span class="charliteral">'<'</span>,' <',$content_xml))); <a name="l00435"></a>00435 $contentArr = $this->pObj->splitRegularContent($utf8_content); <a name="l00436"></a>00436 $contentArr['title'] = basename($absFile); <span class="comment">// Make sure the title doesn't expose the absolute path!</span> <a name="l00437"></a>00437 <a name="l00438"></a>00438 <span class="comment">// Meta information</span> <a name="l00439"></a>00439 $metaContent = <a class="code" href="classt3lib__div.html#459ef7b829f164c32f9ec1b8a02f7774">t3lib_div::xml2tree</a>($meta_xml); <a name="l00440"></a>00440 $metaContent = $metaContent['office:document-meta'][0]['ch']['office:meta'][0]['ch']; <a name="l00441"></a>00441 <span class="keywordflow">if</span> (is_array($metaContent)) { <a name="l00442"></a>00442 $contentArr['title'] = $metaContent['dc:title'][0]['values'][0] ? $metaContent['dc:title'][0]['values'][0] : $contentArr['title']; <a name="l00443"></a>00443 $contentArr['description'] = $metaContent['dc:subject'][0]['values'][0].<span class="charliteral">' '</span>.$metaContent['dc:description'][0]['values'][0]; <a name="l00444"></a>00444 <a name="l00445"></a>00445 <span class="comment">// Keywords collected:</span> <a name="l00446"></a>00446 <span class="keywordflow">if</span> (is_array($metaContent['meta:keywords'][0]['ch']['meta:keyword'])) { <a name="l00447"></a>00447 foreach ($metaContent['meta:keywords'][0]['ch']['meta:keyword'] as $kwDat) { <a name="l00448"></a>00448 $contentArr['keywords'].= $kwDat['values'][0].<span class="charliteral">' '</span>; <a name="l00449"></a>00449 } <a name="l00450"></a>00450 } <a name="l00451"></a>00451 } <a name="l00452"></a>00452 } <a name="l00453"></a>00453 <span class="keywordflow">break</span>; <a name="l00454"></a>00454 <span class="keywordflow">case</span> 'rtf': <a name="l00455"></a>00455 <span class="keywordflow">if</span> ($this->app['unrtf']) { <a name="l00456"></a>00456 $cmd = $this->app['unrtf'].' <span class="stringliteral">"'.$absFile.'"</span>'; <a name="l00457"></a>00457 exec($cmd,$res); <a name="l00458"></a>00458 $fileContent = implode(chr(10),$res); <a name="l00459"></a>00459 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent); <a name="l00460"></a>00460 $contentArr = $this->pObj->splitHTMLContent($fileContent); <a name="l00461"></a>00461 } <a name="l00462"></a>00462 <span class="keywordflow">break</span>; <a name="l00463"></a>00463 <span class="keywordflow">case</span> 'txt': <a name="l00464"></a>00464 <span class="keywordflow">case</span> 'csv': <span class="comment">// Raw text</span> <a name="l00465"></a>00465 $content = t3lib_div::getUrl($absFile); <a name="l00466"></a>00466 <span class="comment">// TODO: Auto-registration of charset???? -> utf-8 (Current assuming western europe...)</span> <a name="l00467"></a>00467 $content = $this->pObj->convertHTMLToUtf8($content, 'iso-8859-1'); <a name="l00468"></a>00468 $contentArr = $this->pObj->splitRegularContent($content); <a name="l00469"></a>00469 $contentArr['title'] = basename($absFile); <span class="comment">// Make sure the title doesn't expose the absolute path!</span> <a name="l00470"></a>00470 <span class="keywordflow">break</span>; <a name="l00471"></a>00471 <span class="keywordflow">case</span> 'html': <a name="l00472"></a>00472 <span class="keywordflow">case</span> 'htm': <a name="l00473"></a>00473 $fileContent = t3lib_div::getUrl($absFile); <a name="l00474"></a>00474 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent); <a name="l00475"></a>00475 $contentArr = $this->pObj->splitHTMLContent($fileContent); <a name="l00476"></a>00476 <span class="keywordflow">break</span>; <a name="l00477"></a>00477 <span class="keywordflow">case</span> 'xml': <span class="comment">// PHP strip-tags()</span> <a name="l00478"></a>00478 $fileContent = t3lib_div::getUrl($absFile); <a name="l00479"></a>00479 <a name="l00480"></a>00480 <span class="comment">// Finding charset:</span> <a name="l00481"></a>00481 eregi('^[[:space:]]*<\?xml[^>]+encoding[[:space:]]*=[[:space:]]*[<span class="stringliteral">"\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["</span>\<span class="charliteral">']'</span>,substr($fileContent,0,200),$reg); <a name="l00482"></a>00482 $charset = $reg[1] ? $this->pObj->csObj->parse_charset($reg[1]) : 'utf-8'; <a name="l00483"></a>00483 <a name="l00484"></a>00484 <span class="comment">// Converting content:</span> <a name="l00485"></a>00485 $fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace(<span class="charliteral">'<'</span>,' <',$fileContent)), $charset); <a name="l00486"></a>00486 $contentArr = $this->pObj->splitRegularContent($fileContent); <a name="l00487"></a>00487 $contentArr['title'] = basename($absFile); <span class="comment">// Make sure the title doesn't expose the absolute path!</span> <a name="l00488"></a>00488 <span class="keywordflow">break</span>; <a name="l00489"></a>00489 <span class="keywordflow">case</span> 'jpg': <span class="comment">// PHP EXIF</span> <a name="l00490"></a>00490 <span class="keywordflow">case</span> 'jpeg': <span class="comment">// PHP EXIF</span> <a name="l00491"></a>00491 <span class="keywordflow">case</span> 'tif': <span class="comment">// PHP EXIF</span> <a name="l00492"></a>00492 $exif = exif_read_data($absFile, 'IFD0'); <a name="l00493"></a>00493 <span class="keywordflow">if</span> ($exif) { <a name="l00494"></a>00494 $comment = trim($exif['COMMENT'][0].<span class="charliteral">' '</span>.$exif['ImageDescription']); <span class="comment">// The comments in JPEG files are utf-8, while in Tif files they are 7-bit ascii.</span> <a name="l00495"></a>00495 } <span class="keywordflow">else</span> { <a name="l00496"></a>00496 $comment = ''; <a name="l00497"></a>00497 } <a name="l00498"></a>00498 $contentArr = $this->pObj->splitRegularContent($comment); <a name="l00499"></a>00499 $contentArr['title'] = basename($absFile); <span class="comment">// Make sure the title doesn't expose the absolute path!</span> <a name="l00500"></a>00500 <span class="keywordflow">break</span>; <a name="l00501"></a>00501 <span class="keywordflow">default</span>: <a name="l00502"></a>00502 <span class="keywordflow">return</span> <span class="keyword">false</span>; <a name="l00503"></a>00503 <span class="keywordflow">break</span>; <a name="l00504"></a>00504 } <a name="l00505"></a>00505 <span class="comment">// If no title (and why should there be...) then the file-name is set as title. This will raise the hits considerably if the search matches the document name.</span> <a name="l00506"></a>00506 <span class="keywordflow">if</span> (is_array($contentArr) && !$contentArr['title']) { <a name="l00507"></a>00507 $contentArr['title'] = str_replace(<span class="charliteral">'_'</span>,<span class="charliteral">' '</span>,basename($absFile)); <span class="comment">// Substituting "_" for " " because many filenames may have this instead of a space char.</span> <a name="l00508"></a>00508 } <a name="l00509"></a>00509 <a name="l00510"></a>00510 <span class="keywordflow">return</span> $contentArr; <a name="l00511"></a>00511 } <a name="l00512"></a>00512 <a name="l00521"></a><a class="code" href="classtx__indexed__search__extparse.html#c249a5a5534b0ffb8db0ef31d552c9c6">00521</a> function <a class="code" href="classtx__indexed__search__extparse.html#c249a5a5534b0ffb8db0ef31d552c9c6">fileContentParts</a>($ext,$absFile) { <a name="l00522"></a>00522 $cParts = array(0); <a name="l00523"></a>00523 <span class="keywordflow">switch</span> ($ext) { <a name="l00524"></a>00524 <span class="keywordflow">case</span> 'pdf': <a name="l00525"></a>00525 <span class="comment">// Getting pdf-info:</span> <a name="l00526"></a>00526 $cmd = $this->app['pdfinfo'].' <span class="stringliteral">"'.$absFile.'"</span>'; <a name="l00527"></a>00527 exec($cmd,$res); <a name="l00528"></a>00528 $pdfInfo = $this-><a class="code" href="classtx__indexed__search__extparse.html#b410aa49a477e84b1ea78e5287a6a121">splitPdfInfo</a>($res); <a name="l00529"></a>00529 <a name="l00530"></a>00530 <span class="keywordflow">if</span> (intval($pdfInfo['pages'])) { <a name="l00531"></a>00531 $cParts = array(); <a name="l00532"></a>00532 <a name="l00533"></a>00533 <span class="comment">// Calculate mode</span> <a name="l00534"></a>00534 <span class="keywordflow">if</span> ($this->pdf_mode>0) { <a name="l00535"></a>00535 $iter = ceil($pdfInfo['pages']/$this->pdf_mode); <a name="l00536"></a>00536 } <span class="keywordflow">else</span> { <a name="l00537"></a>00537 $iter = <a class="code" href="classt3lib__div.html#79f6a47a7658e28e3f65666f8ebc19f8">t3lib_div::intInRange</a>(abs($this->pdf_mode),1,$pdfInfo['pages']); <a name="l00538"></a>00538 } <a name="l00539"></a>00539 <a name="l00540"></a>00540 <span class="comment">// Traverse and create intervals.</span> <a name="l00541"></a>00541 <span class="keywordflow">for</span> ($a=0;$a<$iter;$a++) { <a name="l00542"></a>00542 $low = floor($a*($pdfInfo['pages']/$iter))+1; <a name="l00543"></a>00543 $high = floor(($a+1)*($pdfInfo['pages']/$iter)); <a name="l00544"></a>00544 $cParts[] = $low.<span class="charliteral">'-'</span>.$high; <a name="l00545"></a>00545 } <a name="l00546"></a>00546 } <a name="l00547"></a>00547 <span class="keywordflow">break</span>; <a name="l00548"></a>00548 } <a name="l00549"></a>00549 <span class="keywordflow">return</span> $cParts; <a name="l00550"></a>00550 } <a name="l00551"></a>00551 <a name="l00560"></a><a class="code" href="classtx__indexed__search__extparse.html#b410aa49a477e84b1ea78e5287a6a121">00560</a> function splitPdfInfo($pdfInfoArray) { <a name="l00561"></a>00561 $res = array(); <a name="l00562"></a>00562 <span class="keywordflow">if</span> (is_array($pdfInfoArray)) { <a name="l00563"></a>00563 foreach($pdfInfoArray as $line) { <a name="l00564"></a>00564 $parts = explode(<span class="charliteral">':'</span>,$line,2); <a name="l00565"></a>00565 <span class="keywordflow">if</span> (count($parts)>1 && trim($parts[0])) { <a name="l00566"></a>00566 $res[strtolower(trim($parts[0]))] = trim($parts[1]); <a name="l00567"></a>00567 } <a name="l00568"></a>00568 } <a name="l00569"></a>00569 } <a name="l00570"></a>00570 <span class="keywordflow">return</span> $res; <a name="l00571"></a>00571 } <a name="l00572"></a>00572 <a name="l00579"></a><a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">00579</a> function removeEndJunk($string) { <a name="l00580"></a>00580 <span class="keywordflow">return</span> trim(ereg_replace(<span class="charliteral">'['</span>.chr(10).chr(12).']*$<span class="charliteral">','</span>',$string)); <a name="l00581"></a>00581 } <a name="l00582"></a>00582 <a name="l00583"></a>00583 <a name="l00584"></a>00584 <a name="l00585"></a>00585 <a name="l00586"></a>00586 <a name="l00587"></a>00587 <a name="l00588"></a>00588 <a name="l00589"></a>00589 <a name="l00590"></a>00590 <a name="l00591"></a>00591 <a name="l00592"></a>00592 <a name="l00593"></a>00593 <a name="l00594"></a>00594 <span class="comment">/************************</span> <a name="l00595"></a>00595 <span class="comment"> *</span> <a name="l00596"></a>00596 <span class="comment"> * Backend analyzer</span> <a name="l00597"></a>00597 <span class="comment"> *</span> <a name="l00598"></a>00598 <span class="comment"> ************************/</span> <a name="l00599"></a>00599 <a name="l00606"></a><a class="code" href="classtx__indexed__search__extparse.html#72c4e6ebab1832eeb7bcba1c2d59b585">00606</a> function getIcon($extension) { <a name="l00607"></a>00607 <span class="keywordflow">if</span> ($extension=='htm') $extension = 'html'; <a name="l00608"></a>00608 <span class="keywordflow">if</span> ($extension=='jpeg') $extension = 'jpg'; <a name="l00609"></a>00609 <span class="keywordflow">return</span> 'EXT:indexed_search/pi/res/'.$extension.'.gif'; <a name="l00610"></a>00610 } <a name="l00611"></a>00611 } <a name="l00612"></a>00612 <a name="l00613"></a>00613 <span class="keywordflow">if</span> (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/<span class="keyword">class</span>.external_parser.php']) { <a name="l00614"></a>00614 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/<span class="keyword">class</span>.external_parser.php']); <a name="l00615"></a>00615 } <a name="l00616"></a>00616 ?> </pre></div><?php include_once '../doc-typo3-funcs.php'; get_footer(); ?>