<?php
  include_once '../doc-typo3-funcs.php';
  $doxygen_vars = array(	"title" => "TYPO3 4.0.1: typo3_src-4.0.1/typo3/sysext/indexed_search/class.external_parser.php Source File",
				"datetime" => "Sat Dec 2 19:22:32 2006",
				"date" => "2 Dec 2006",
				"doxygenversion" => "1.4.6",
				"projectname" => "TYPO3 4.0.1",
				"projectnumber" => "4.0.1"
			);
  get_header($doxygen_vars);
?>
<!-- Generated by Doxygen 1.4.6 -->
<div class="tabs">
  <ul>
    <li><a href="main.html"><span>Main&nbsp;Page</span></a></li>
    <li><a href="namespaces.html"><span>Namespaces</span></a></li>
    <li><a href="classes.html"><span>Classes</span></a></li>
    <li id="current"><a href="files.html"><span>Files</span></a></li>
    <li><a href="dirs.html"><span>Directories</span></a></li>
    <li><a href="pages.html"><span>Related&nbsp;Pages</span></a></li>
    <li><a href="examples.html"><span>Examples</span></a></li>
    <li>
      <form action="search.php" method="get">
        <table cellspacing="0" cellpadding="0" border="0">
          <tr>
            <td><label>&nbsp;<u>S</u>earch&nbsp;for&nbsp;</label></td>
            <td><input type="text" name="query" value="" size="20" accesskey="s"/></td>
          </tr>
        </table>
      </form>
    </li>
  </ul></div>
<div class="nav">
<a class="el" href="dir_c8daf1ad746050abf985cc546c89e248.html">typo3_src-4.0.1</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_18071ae4545d8b3e0364d30c0659c74a.html">typo3</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_57bf1ed8249c1fd5b014486d01bcb27a.html">sysext</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_1144f7dd65e866e7cd4aa66020137172.html">indexed_search</a></div>
<h1>class.external_parser.php</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 &lt;?php
<a name="l00002"></a>00002 <span class="comment">/***************************************************************</span>
<a name="l00003"></a>00003 <span class="comment">*  Copyright notice</span>
<a name="l00004"></a>00004 <span class="comment">*</span>
<a name="l00005"></a>00005 <span class="comment">*  (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)</span>
<a name="l00006"></a>00006 <span class="comment">*  All rights reserved</span>
<a name="l00007"></a>00007 <span class="comment">*</span>
<a name="l00008"></a>00008 <span class="comment">*  This script is part of the TYPO3 project. The TYPO3 project is</span>
<a name="l00009"></a>00009 <span class="comment">*  free software; you can redistribute it and/or modify</span>
<a name="l00010"></a>00010 <span class="comment">*  it under the terms of the GNU General Public License as published by</span>
<a name="l00011"></a>00011 <span class="comment">*  the Free Software Foundation; either version 2 of the License, or</span>
<a name="l00012"></a>00012 <span class="comment">*  (at your option) any later version.</span>
<a name="l00013"></a>00013 <span class="comment">*</span>
<a name="l00014"></a>00014 <span class="comment">*  The GNU General Public License can be found at</span>
<a name="l00015"></a>00015 <span class="comment">*  http://www.gnu.org/copyleft/gpl.html.</span>
<a name="l00016"></a>00016 <span class="comment">*  A copy is found in the textfile GPL.txt and important notices to the license</span>
<a name="l00017"></a>00017 <span class="comment">*  from the author is found in LICENSE.txt distributed with these scripts.</span>
<a name="l00018"></a>00018 <span class="comment">*</span>
<a name="l00019"></a>00019 <span class="comment">*</span>
<a name="l00020"></a>00020 <span class="comment">*  This script is distributed in the hope that it will be useful,</span>
<a name="l00021"></a>00021 <span class="comment">*  but WITHOUT ANY WARRANTY; without even the implied warranty of</span>
<a name="l00022"></a>00022 <span class="comment">*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the</span>
<a name="l00023"></a>00023 <span class="comment">*  GNU General Public License for more details.</span>
<a name="l00024"></a>00024 <span class="comment">*</span>
<a name="l00025"></a>00025 <span class="comment">*  This copyright notice MUST APPEAR in all copies of the script!</span>
<a name="l00026"></a>00026 <span class="comment">***************************************************************/</span>
<a name="l00075"></a><a class="code" href="classtx__indexed__search__extparse.html">00075</a> <span class="keyword">class </span><a class="code" href="classtx__indexed__search__extparse.html">tx_indexed_search_extparse</a> {
<a name="l00076"></a>00076 
<a name="l00077"></a>00077                 <span class="comment">// This value is also overridden from config.</span>
<a name="l00078"></a><a class="code" href="classtx__indexed__search__extparse.html#483c9c6b542bda541591959eff79aa7f">00078</a>         var <a class="code" href="classtx__indexed__search__extparse.html#483c9c6b542bda541591959eff79aa7f">$pdf_mode</a> = -20;    <span class="comment">// zero: whole PDF file is indexed in one. positive value: Indicates number of pages at a time, eg. "5" would means 1-5,6-10,.... Negative integer would indicate (abs value) number of groups. Eg "3" groups of 10 pages would be 1-4,5-8,9-10</span>
<a name="l00079"></a>00079 
<a name="l00080"></a>00080                 <span class="comment">// This array is configured in initialization:</span>
<a name="l00081"></a><a class="code" href="classtx__indexed__search__extparse.html#6ccbe78d9365a6ee9f26bb912588f804">00081</a>         var <a class="code" href="classtx__indexed__search__extparse.html#6ccbe78d9365a6ee9f26bb912588f804">$app</a> = array();
<a name="l00082"></a><a class="code" href="classtx__indexed__search__extparse.html#9f8cfdeeb28969198a9c77cf9b6737db">00082</a>         var <a class="code" href="classtx__indexed__search__extparse.html#9f8cfdeeb28969198a9c77cf9b6737db">$ext2itemtype_map</a> = array();
<a name="l00083"></a><a class="code" href="classtx__indexed__search__extparse.html#c78b29692431ee5370e92344a5a0ca2d">00083</a>         var <a class="code" href="classtx__indexed__search__extparse.html#c78b29692431ee5370e92344a5a0ca2d">$supportedExtensions</a> = array();
<a name="l00084"></a>00084 
<a name="l00085"></a><a class="code" href="classtx__indexed__search__extparse.html#a6d3172574bef5eefdba59078293f546">00085</a>         var <a class="code" href="classtx__indexed__search__extparse.html#a6d3172574bef5eefdba59078293f546">$pObj</a>;              <span class="comment">// Reference to parent object (indexer class)</span>
<a name="l00086"></a>00086 
<a name="l00087"></a>00087 
<a name="l00094"></a><a class="code" href="classtx__indexed__search__extparse.html#03b11751d3b7c7932c9636099380e22f">00094</a>         function <a class="code" href="classtx__indexed__search__extparse.html#03b11751d3b7c7932c9636099380e22f">initParser</a>($extension) {
<a name="l00095"></a>00095 
<a name="l00096"></a>00096                         <span class="comment">// Then read indexer-config and set if appropriate:</span>
<a name="l00097"></a>00097                 $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
<a name="l00098"></a>00098 
<a name="l00099"></a>00099                         <span class="comment">// If windows, apply extension to tool name:</span>
<a name="l00100"></a>00100                 $exe = (TYPO3_OS == 'WIN') ? '.exe' : ''; <span class="comment">// lg</span>
<a name="l00101"></a>00101                 $extOK = FALSE;
<a name="l00102"></a>00102                 $mainExtension = '';
<a name="l00103"></a>00103 
<a name="l00104"></a>00104                         <span class="comment">// Ignore extensions</span>
<a name="l00105"></a>00105                 $ignoreExtensions = <a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(<span class="charliteral">','</span>, strtolower($indexerConfig['ignoreExtensions']),1);
<a name="l00106"></a>00106                 <span class="keywordflow">if</span> (in_array($extension, $ignoreExtensions))    {
<a name="l00107"></a>00107                         $this-&gt;pObj-&gt;log_setTSlogMessage('Extension <span class="stringliteral">"'.$extension.'"</span> was set to be ignored.',1);
<a name="l00108"></a>00108                         <span class="keywordflow">return</span> FALSE;
<a name="l00109"></a>00109                 }
<a name="l00110"></a>00110 
<a name="l00111"></a>00111                         <span class="comment">// Switch on file extension:</span>
<a name="l00112"></a>00112                 <span class="keywordflow">switch</span>($extension)      {
<a name="l00113"></a>00113                         <span class="keywordflow">case</span> 'pdf':
<a name="l00114"></a>00114                                         <span class="comment">// PDF</span>
<a name="l00115"></a>00115                                 <span class="keywordflow">if</span> ($indexerConfig['pdftools']) {
<a name="l00116"></a>00116                                         $pdfPath = ereg_replace(<span class="stringliteral">"\/$"</span>,'',$indexerConfig['pdftools']).<span class="charliteral">'/'</span>;
<a name="l00117"></a>00117                                         <span class="keywordflow">if</span> (ini_get('safe_mode') || (@is_file($pdfPath.'pdftotext'.$exe) &amp;&amp; @is_file($pdfPath.'pdfinfo'.$exe))) {
<a name="l00118"></a>00118                                                 $this-&gt;app['pdfinfo'] = $pdfPath.'pdfinfo'.$exe;
<a name="l00119"></a>00119                                                 $this-&gt;app['pdftotext'] = $pdfPath.'pdftotext'.$exe;
<a name="l00120"></a>00120                                                         <span class="comment">// PDF mode:</span>
<a name="l00121"></a>00121                                                 $this-&gt;pdf_mode = <a class="code" href="classt3lib__div.html#79f6a47a7658e28e3f65666f8ebc19f8">t3lib_div::intInRange</a>($indexerConfig['pdf_mode'],-100,100);
<a name="l00122"></a>00122                                                 $extOK = TRUE;
<a name="l00123"></a>00123                                         } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage(<span class="stringliteral">"PDF tools was not found in paths '"</span>.$pdfPath.<span class="stringliteral">"pdftotext' and/or '"</span>.$pdfPath.<span class="stringliteral">"pdfinfo'"</span>,3);
<a name="l00124"></a>00124                                 } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage('PDF tools disabled',1);
<a name="l00125"></a>00125                         <span class="keywordflow">break</span>;
<a name="l00126"></a>00126                         <span class="keywordflow">case</span> 'doc':
<a name="l00127"></a>00127                                         <span class="comment">// Catdoc</span>
<a name="l00128"></a>00128                                 <span class="keywordflow">if</span> ($indexerConfig['catdoc'])   {
<a name="l00129"></a>00129                                         $catdocPath = ereg_replace(<span class="stringliteral">"\/$"</span>,'',$indexerConfig['catdoc']).<span class="charliteral">'/'</span>;
<a name="l00130"></a>00130                                         <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($catdocPath.'catdoc'.$exe))        {
<a name="l00131"></a>00131                                                 $this-&gt;app['catdoc'] = $catdocPath.'catdoc'.$exe;
<a name="l00132"></a>00132                                                 $extOK = TRUE;
<a name="l00133"></a>00133                                         } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage(<span class="stringliteral">"'catdoc' tool for reading Word-files was not found in path '"</span>.$catdocPath.<span class="stringliteral">"catdoc'"</span>,3);
<a name="l00134"></a>00134                                 } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage('catdoc tools (Word-files) disabled',1);
<a name="l00135"></a>00135                         <span class="keywordflow">break</span>;
<a name="l00136"></a>00136                         <span class="keywordflow">case</span> 'pps':             <span class="comment">// MS PowerPoint(?)</span>
<a name="l00137"></a>00137                         <span class="keywordflow">case</span> 'ppt':             <span class="comment">// MS PowerPoint</span>
<a name="l00138"></a>00138                                         <span class="comment">// ppthtml</span>
<a name="l00139"></a>00139                                 <span class="keywordflow">if</span> ($indexerConfig['ppthtml'])  {
<a name="l00140"></a>00140                                         $ppthtmlPath = ereg_replace('\/$<span class="charliteral">','</span>',$indexerConfig['ppthtml']).<span class="charliteral">'/'</span>;
<a name="l00141"></a>00141                                         <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($ppthtmlPath.'ppthtml'.$exe)){
<a name="l00142"></a>00142                                                 $this-&gt;app['ppthtml'] = $ppthtmlPath.'ppthtml'.$exe;
<a name="l00143"></a>00143                                                 $extOK = TRUE;
<a name="l00144"></a>00144                                         } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage(<span class="stringliteral">"'ppthtml' tool for reading Powerpoint-files was not found in path '"</span>.$ppthtmlPath.<span class="stringliteral">"ppthtml'"</span>,3);
<a name="l00145"></a>00145                                 } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage('ppthtml tools (Powerpoint-files) disabled',1);
<a name="l00146"></a>00146                         <span class="keywordflow">break</span>;
<a name="l00147"></a>00147                         <span class="keywordflow">case</span> 'xls':             <span class="comment">// MS Excel</span>
<a name="l00148"></a>00148                                         <span class="comment">// Xlhtml</span>
<a name="l00149"></a>00149                                 <span class="keywordflow">if</span> ($indexerConfig['xlhtml'])   {
<a name="l00150"></a>00150                                         $xlhtmlPath = ereg_replace('\/$<span class="charliteral">','</span>',$indexerConfig['xlhtml']).<span class="charliteral">'/'</span>;
<a name="l00151"></a>00151                                         <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($xlhtmlPath.'xlhtml'.$exe)){
<a name="l00152"></a>00152                                                 $this-&gt;app['xlhtml'] = $xlhtmlPath.'xlhtml'.$exe;
<a name="l00153"></a>00153                                                 $extOK = TRUE;
<a name="l00154"></a>00154                                         } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage(<span class="stringliteral">"'xlhtml' tool for reading Excel-files was not found in path '"</span>.$xlhtmlPath.<span class="stringliteral">"xlhtml'"</span>,3);
<a name="l00155"></a>00155                                 } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage('xlhtml tools (Excel-files) disabled',1);
<a name="l00156"></a>00156                         <span class="keywordflow">break</span>;
<a name="l00157"></a>00157                         <span class="keywordflow">case</span> 'sxc':             <span class="comment">// Open Office Calc.</span>
<a name="l00158"></a>00158                         <span class="keywordflow">case</span> 'sxi':             <span class="comment">// Open Office Impress</span>
<a name="l00159"></a>00159                         <span class="keywordflow">case</span> 'sxw':             <span class="comment">// Open Office Writer</span>
<a name="l00160"></a>00160                         <span class="keywordflow">case</span> 'ods':             <span class="comment">// Oasis OpenDocument Spreadsheet</span>
<a name="l00161"></a>00161                         <span class="keywordflow">case</span> 'odp':             <span class="comment">// Oasis OpenDocument Presentation</span>
<a name="l00162"></a>00162                         <span class="keywordflow">case</span> 'odt':             <span class="comment">// Oasis OpenDocument Text</span>
<a name="l00163"></a>00163                                 <span class="keywordflow">if</span> ($indexerConfig['unzip'])    {
<a name="l00164"></a>00164                                         $unzipPath = preg_replace('/\/$/<span class="charliteral">','</span>',$indexerConfig['unzip']).<span class="charliteral">'/'</span>;
<a name="l00165"></a>00165                                         <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($unzipPath.'unzip'.$exe))  {
<a name="l00166"></a>00166                                                 $this-&gt;app['unzip'] = $unzipPath.'unzip'.$exe;
<a name="l00167"></a>00167                                                 $extOK = TRUE;
<a name="l00168"></a>00168                                         } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage(<span class="stringliteral">"'unzip' tool for reading OpenOffice.org-files was not found in path '"</span>.$unzipPath.<span class="stringliteral">"unzip'"</span>,3);
<a name="l00169"></a>00169                                 } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage('unzip tool (OpenOffice.org-files) disabled',1);
<a name="l00170"></a>00170                         <span class="keywordflow">break</span>;
<a name="l00171"></a>00171                         <span class="keywordflow">case</span> 'rtf':
<a name="l00172"></a>00172                                         <span class="comment">// Catdoc</span>
<a name="l00173"></a>00173                                 <span class="keywordflow">if</span> ($indexerConfig['unrtf'])    {
<a name="l00174"></a>00174                                         $unrtfPath = ereg_replace(<span class="stringliteral">"\/$"</span>,'',$indexerConfig['unrtf']).<span class="charliteral">'/'</span>;
<a name="l00175"></a>00175                                         <span class="keywordflow">if</span> (ini_get('safe_mode') || @is_file($unrtfPath.'unrtf'.$exe))  {
<a name="l00176"></a>00176                                                 $this-&gt;app['unrtf'] = $unrtfPath.'unrtf'.$exe;
<a name="l00177"></a>00177                                                 $extOK = TRUE;
<a name="l00178"></a>00178                                         } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage(<span class="stringliteral">"'unrtf' tool for reading RTF-files was not found in path '"</span>.$unrtfPath.<span class="stringliteral">"unrtf'"</span>,3);
<a name="l00179"></a>00179                                 } <span class="keywordflow">else</span> $this-&gt;pObj-&gt;log_setTSlogMessage('unrtf tool (RTF-files) disabled',1);
<a name="l00180"></a>00180                         <span class="keywordflow">break</span>;
<a name="l00181"></a>00181                         <span class="keywordflow">case</span> 'txt':             <span class="comment">// Raw text</span>
<a name="l00182"></a>00182                         <span class="keywordflow">case</span> 'csv':             <span class="comment">// Raw text</span>
<a name="l00183"></a>00183                         <span class="keywordflow">case</span> 'xml':             <span class="comment">// PHP strip-tags()</span>
<a name="l00184"></a>00184                         <span class="keywordflow">case</span> 'tif':             <span class="comment">// PHP EXIF</span>
<a name="l00185"></a>00185                                 $extOK = TRUE;
<a name="l00186"></a>00186                         <span class="keywordflow">break</span>;
<a name="l00187"></a>00187                         <span class="keywordflow">case</span> 'html':    <span class="comment">// PHP strip-tags()</span>
<a name="l00188"></a>00188                         <span class="keywordflow">case</span> 'htm':             <span class="comment">// PHP strip-tags()</span>
<a name="l00189"></a>00189                                 $extOK = TRUE;
<a name="l00190"></a>00190                                 $mainExtension = 'html';        <span class="comment">// making "html" the common "item_type"</span>
<a name="l00191"></a>00191                         <span class="keywordflow">break</span>;
<a name="l00192"></a>00192                         <span class="keywordflow">case</span> 'jpg':             <span class="comment">// PHP EXIF</span>
<a name="l00193"></a>00193                         <span class="keywordflow">case</span> 'jpeg':    <span class="comment">// PHP EXIF</span>
<a name="l00194"></a>00194                                 $extOK = TRUE;
<a name="l00195"></a>00195                                 $mainExtension = 'jpeg';        <span class="comment">// making "jpeg" the common item_type</span>
<a name="l00196"></a>00196                         <span class="keywordflow">break</span>;
<a name="l00197"></a>00197                 }
<a name="l00198"></a>00198 
<a name="l00199"></a>00199                         <span class="comment">// If extension was OK:</span>
<a name="l00200"></a>00200                 <span class="keywordflow">if</span> ($extOK)     {
<a name="l00201"></a>00201                         $this-&gt;supportedExtensions[$extension] = TRUE;
<a name="l00202"></a>00202                         $this-&gt;ext2itemtype_map[$extension] = $mainExtension ? $mainExtension : $extension;
<a name="l00203"></a>00203                         <span class="keywordflow">return</span> TRUE;
<a name="l00204"></a>00204                 }
<a name="l00205"></a>00205         }
<a name="l00206"></a>00206 
<a name="l00214"></a><a class="code" href="classtx__indexed__search__extparse.html#6c7999dfdc13d3188d63085fb7c6c4ea">00214</a>         function <a class="code" href="classtx__indexed__search__extparse.html#6c7999dfdc13d3188d63085fb7c6c4ea">softInit</a>($extension)   {
<a name="l00215"></a>00215                 <span class="keywordflow">switch</span>($extension)      {
<a name="l00216"></a>00216                         <span class="keywordflow">case</span> 'pdf':             <span class="comment">// PDF</span>
<a name="l00217"></a>00217                         <span class="keywordflow">case</span> 'doc':             <span class="comment">// MS Word files</span>
<a name="l00218"></a>00218                         <span class="keywordflow">case</span> 'pps':             <span class="comment">// MS PowerPoint</span>
<a name="l00219"></a>00219                         <span class="keywordflow">case</span> 'ppt':             <span class="comment">// MS PowerPoint</span>
<a name="l00220"></a>00220                         <span class="keywordflow">case</span> 'xls':             <span class="comment">// MS Excel</span>
<a name="l00221"></a>00221                         <span class="keywordflow">case</span> 'sxc':             <span class="comment">// Open Office Calc.</span>
<a name="l00222"></a>00222                         <span class="keywordflow">case</span> 'sxi':             <span class="comment">// Open Office Impress</span>
<a name="l00223"></a>00223                         <span class="keywordflow">case</span> 'sxw':             <span class="comment">// Open Office Writer</span>
<a name="l00224"></a>00224                         <span class="keywordflow">case</span> 'ods':             <span class="comment">// Oasis OpenDocument Spreadsheet</span>
<a name="l00225"></a>00225                         <span class="keywordflow">case</span> 'odp':             <span class="comment">// Oasis OpenDocument Presentation</span>
<a name="l00226"></a>00226                         <span class="keywordflow">case</span> 'odt':             <span class="comment">// Oasis OpenDocument Text</span>
<a name="l00227"></a>00227                         <span class="keywordflow">case</span> 'rtf':             <span class="comment">// RTF documents</span>
<a name="l00228"></a>00228                         <span class="keywordflow">case</span> 'txt':             <span class="comment">// ASCII Text documents</span>
<a name="l00229"></a>00229                         <span class="keywordflow">case</span> 'html':    <span class="comment">// HTML</span>
<a name="l00230"></a>00230                         <span class="keywordflow">case</span> 'htm':             <span class="comment">// HTML</span>
<a name="l00231"></a>00231                         <span class="keywordflow">case</span> 'csv':             <span class="comment">// Comma Separated Values</span>
<a name="l00232"></a>00232                         <span class="keywordflow">case</span> 'xml':             <span class="comment">// Generic XML</span>
<a name="l00233"></a>00233                         <span class="keywordflow">case</span> 'jpg':             <span class="comment">// Jpeg images (EXIF comment)</span>
<a name="l00234"></a>00234                         <span class="keywordflow">case</span> 'jpeg':    <span class="comment">// Jpeg images (EXIF comment)</span>
<a name="l00235"></a>00235                         <span class="keywordflow">case</span> 'tif':             <span class="comment">// TIF images (EXIF comment)</span>
<a name="l00236"></a>00236                                 <span class="keywordflow">return</span> TRUE;
<a name="l00237"></a>00237                         <span class="keywordflow">break</span>;
<a name="l00238"></a>00238                 }
<a name="l00239"></a>00239         }
<a name="l00240"></a>00240 
<a name="l00247"></a><a class="code" href="classtx__indexed__search__extparse.html#b6e93a955bf2f737529d1e615d327712">00247</a>         function <a class="code" href="classtx__indexed__search__extparse.html#b6e93a955bf2f737529d1e615d327712">searchTypeMediaTitle</a>($extension)       {
<a name="l00248"></a>00248 
<a name="l00249"></a>00249                         <span class="comment">// Read indexer-config</span>
<a name="l00250"></a>00250                 $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
<a name="l00251"></a>00251 
<a name="l00252"></a>00252                         <span class="comment">// Ignore extensions</span>
<a name="l00253"></a>00253                 $ignoreExtensions = <a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(<span class="charliteral">','</span>, strtolower($indexerConfig['ignoreExtensions']),1);
<a name="l00254"></a>00254                 <span class="keywordflow">if</span> (in_array($extension, $ignoreExtensions))    {
<a name="l00255"></a>00255                         <span class="keywordflow">return</span> FALSE;
<a name="l00256"></a>00256                 }
<a name="l00257"></a>00257 
<a name="l00258"></a>00258                         <span class="comment">// Switch on file extension:</span>
<a name="l00259"></a>00259                 <span class="keywordflow">switch</span>($extension)      {
<a name="l00260"></a>00260                         <span class="keywordflow">case</span> 'pdf':
<a name="l00261"></a>00261                                         <span class="comment">// PDF</span>
<a name="l00262"></a>00262                                 <span class="keywordflow">if</span> ($indexerConfig['pdftools']) {
<a name="l00263"></a>00263                                         <span class="keywordflow">return</span> 'PDF';
<a name="l00264"></a>00264                                 }
<a name="l00265"></a>00265                         <span class="keywordflow">break</span>;
<a name="l00266"></a>00266                         <span class="keywordflow">case</span> 'doc':
<a name="l00267"></a>00267                                         <span class="comment">// Catdoc</span>
<a name="l00268"></a>00268                                 <span class="keywordflow">if</span> ($indexerConfig['catdoc'])   {
<a name="l00269"></a>00269                                         <span class="keywordflow">return</span> 'MS Word';
<a name="l00270"></a>00270                                 }
<a name="l00271"></a>00271                         <span class="keywordflow">break</span>;
<a name="l00272"></a>00272                         <span class="keywordflow">case</span> 'pps':             <span class="comment">// MS PowerPoint(?)</span>
<a name="l00273"></a>00273                         <span class="keywordflow">case</span> 'ppt':             <span class="comment">// MS PowerPoint</span>
<a name="l00274"></a>00274                                         <span class="comment">// ppthtml</span>
<a name="l00275"></a>00275                                 <span class="keywordflow">if</span> ($indexerConfig['ppthtml'])  {
<a name="l00276"></a>00276                                         <span class="keywordflow">return</span> 'MS Powerpoint';
<a name="l00277"></a>00277                                 }
<a name="l00278"></a>00278                         <span class="keywordflow">break</span>;
<a name="l00279"></a>00279                         <span class="keywordflow">case</span> 'xls':             <span class="comment">// MS Excel</span>
<a name="l00280"></a>00280                                         <span class="comment">// Xlhtml</span>
<a name="l00281"></a>00281                                 <span class="keywordflow">if</span> ($indexerConfig['xlhtml'])   {
<a name="l00282"></a>00282                                         <span class="keywordflow">return</span> 'MS Excel';
<a name="l00283"></a>00283                                 }
<a name="l00284"></a>00284                         <span class="keywordflow">break</span>;
<a name="l00285"></a>00285                         <span class="keywordflow">case</span> 'sxc':             <span class="comment">// Open Office Calc.</span>
<a name="l00286"></a>00286                         <span class="keywordflow">case</span> 'sxi':             <span class="comment">// Open Office Impress</span>
<a name="l00287"></a>00287                         <span class="keywordflow">case</span> 'sxw':             <span class="comment">// Open Office Writer</span>
<a name="l00288"></a>00288                         <span class="keywordflow">case</span> 'ods':             <span class="comment">// Oasis OpenDocument Spreadsheet</span>
<a name="l00289"></a>00289                         <span class="keywordflow">case</span> 'odp':             <span class="comment">// Oasis OpenDocument Presentation</span>
<a name="l00290"></a>00290                         <span class="keywordflow">case</span> 'odt':             <span class="comment">// Oasis OpenDocument Text</span>
<a name="l00291"></a>00291                                 <span class="keywordflow">if</span> ($indexerConfig['unzip'])    {
<a name="l00292"></a>00292                                         <span class="keywordflow">return</span> 'Open Office';
<a name="l00293"></a>00293                                 }
<a name="l00294"></a>00294                         <span class="keywordflow">break</span>;
<a name="l00295"></a>00295                         <span class="keywordflow">case</span> 'rtf':
<a name="l00296"></a>00296                                         <span class="comment">// Catdoc</span>
<a name="l00297"></a>00297                                 <span class="keywordflow">if</span> ($indexerConfig['unrtf'])    {
<a name="l00298"></a>00298                                         <span class="keywordflow">return</span> 'RTF';
<a name="l00299"></a>00299                                 }
<a name="l00300"></a>00300                         <span class="keywordflow">break</span>;
<a name="l00301"></a>00301                         <span class="keywordflow">case</span> 'html':    <span class="comment">// PHP strip-tags()</span>
<a name="l00302"></a>00302                         <span class="keywordflow">case</span> 'jpeg':    <span class="comment">// PHP EXIF</span>
<a name="l00303"></a>00303                         <span class="keywordflow">case</span> 'txt':             <span class="comment">// Raw text</span>
<a name="l00304"></a>00304                         <span class="keywordflow">case</span> 'csv':             <span class="comment">// Raw text</span>
<a name="l00305"></a>00305                         <span class="keywordflow">case</span> 'xml':             <span class="comment">// PHP strip-tags()</span>
<a name="l00306"></a>00306                         <span class="keywordflow">case</span> 'tif':             <span class="comment">// PHP EXIF</span>
<a name="l00307"></a>00307                                 <span class="keywordflow">return</span> strtoupper($extension);
<a name="l00308"></a>00308                         <span class="keywordflow">break</span>;
<a name="l00309"></a>00309                                 <span class="comment">// NO entry (duplicates or blank):</span>
<a name="l00310"></a>00310                         <span class="keywordflow">case</span> 'htm':             <span class="comment">// PHP strip-tags()</span>
<a name="l00311"></a>00311                         <span class="keywordflow">case</span> 'jpg':             <span class="comment">// PHP EXIF</span>
<a name="l00312"></a>00312                         <span class="keywordflow">default</span>:
<a name="l00313"></a>00313                         <span class="keywordflow">break</span>;
<a name="l00314"></a>00314                 }
<a name="l00315"></a>00315         }
<a name="l00316"></a>00316 
<a name="l00323"></a><a class="code" href="classtx__indexed__search__extparse.html#423bc2a451130a37ac96b89f7eae7605">00323</a>         function <a class="code" href="classtx__indexed__search__extparse.html#423bc2a451130a37ac96b89f7eae7605">isMultiplePageExtension</a>($extension)    {
<a name="l00324"></a>00324                         <span class="comment">// Switch on file extension:</span>
<a name="l00325"></a>00325                 <span class="keywordflow">switch</span>((string)$extension)      {
<a name="l00326"></a>00326                         <span class="keywordflow">case</span> 'pdf':
<a name="l00327"></a>00327                                 <span class="keywordflow">return</span> TRUE;
<a name="l00328"></a>00328                         <span class="keywordflow">break</span>;
<a name="l00329"></a>00329                 }
<a name="l00330"></a>00330         }
<a name="l00331"></a>00331 
<a name="l00332"></a>00332 
<a name="l00333"></a>00333 
<a name="l00334"></a>00334 
<a name="l00335"></a>00335 
<a name="l00336"></a>00336 
<a name="l00337"></a>00337 
<a name="l00338"></a>00338 
<a name="l00339"></a>00339 
<a name="l00340"></a>00340         <span class="comment">/************************</span>
<a name="l00341"></a>00341 <span class="comment">         *</span>
<a name="l00342"></a>00342 <span class="comment">         * Reading documents (for parsing)</span>
<a name="l00343"></a>00343 <span class="comment">         *</span>
<a name="l00344"></a>00344 <span class="comment">         ************************/</span>
<a name="l00345"></a>00345 
<a name="l00354"></a><a class="code" href="classtx__indexed__search__extparse.html#ecd64ada0ac29b1ae5079df31cddaaa1">00354</a>         function <a class="code" href="classtx__indexed__search__extparse.html#ecd64ada0ac29b1ae5079df31cddaaa1">readFileContent</a>($ext,$absFile,$cPKey)  {
<a name="l00355"></a>00355                 unset($contentArr);
<a name="l00356"></a>00356 
<a name="l00357"></a>00357                         <span class="comment">// Return immediately if initialization didn't set support up:</span>
<a name="l00358"></a>00358                 <span class="keywordflow">if</span> (!$this-&gt;supportedExtensions[$ext])  <span class="keywordflow">return</span> FALSE;
<a name="l00359"></a>00359 
<a name="l00360"></a>00360                         <span class="comment">// Switch by file extension</span>
<a name="l00361"></a>00361                 <span class="keywordflow">switch</span> ($ext)   {
<a name="l00362"></a>00362                         <span class="keywordflow">case</span> 'pdf':
<a name="l00363"></a>00363                                 <span class="keywordflow">if</span> ($this-&gt;app['pdfinfo'])      {
<a name="l00364"></a>00364                                                 <span class="comment">// Getting pdf-info:</span>
<a name="l00365"></a>00365                                         $cmd = $this-&gt;app['pdfinfo'].' <span class="stringliteral">"'.$absFile.'"</span>';
<a name="l00366"></a>00366                                         exec($cmd,$res);
<a name="l00367"></a>00367                                         $pdfInfo = $this-&gt;<a class="code" href="classtx__indexed__search__extparse.html#b410aa49a477e84b1ea78e5287a6a121">splitPdfInfo</a>($res);
<a name="l00368"></a>00368                                         <span class="keywordflow">if</span> (intval($pdfInfo['pages']))  {
<a name="l00369"></a>00369                                                 list($low,$high) = explode(<span class="charliteral">'-'</span>,$cPKey);
<a name="l00370"></a>00370 
<a name="l00371"></a>00371                                                         <span class="comment">// Get pdf content:</span>
<a name="l00372"></a>00372                                                 $tempFileName = <a class="code" href="classt3lib__div.html#e126c1b5d0f72003e39c2930d5f65f07">t3lib_div::tempnam</a>('Typo3_indexer');            <span class="comment">// Create temporary name</span>
<a name="l00373"></a>00373                                                 @unlink ($tempFileName);        <span class="comment">// Delete if exists, just to be safe.</span>
<a name="l00374"></a>00374                                                 $cmd = $this-&gt;app['pdftotext'].' -f '.$low.' -l '.$high.' -enc UTF-8 -q <span class="stringliteral">"'.$absFile.'"</span> '.$tempFileName;
<a name="l00375"></a>00375                                                 exec($cmd,$res);
<a name="l00376"></a>00376                                                 <span class="keywordflow">if</span> (@is_file($tempFileName))    {
<a name="l00377"></a>00377                                                         $content = t3lib_div::getUrl($tempFileName);
<a name="l00378"></a>00378                                                         unlink($tempFileName);
<a name="l00379"></a>00379                                                 } <span class="keywordflow">else</span> {
<a name="l00380"></a>00380                                                         $this-&gt;pObj-&gt;log_setTSlogMessage('PDFtoText Failed on <span class="keyword">this</span> document: '.$absFile.<span class="stringliteral">". Maybe the PDF file is locked for printing or encrypted."</span>,2);
<a name="l00381"></a>00381                                                 }
<a name="l00382"></a>00382                                                 <span class="keywordflow">if</span> (strlen($content))   {
<a name="l00383"></a>00383                                                         $contentArr = $this-&gt;pObj-&gt;splitRegularContent($this-&gt;<a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">removeEndJunk</a>($content));
<a name="l00384"></a>00384                                                 }
<a name="l00385"></a>00385                                         }
<a name="l00386"></a>00386                                 }
<a name="l00387"></a>00387                         <span class="keywordflow">break</span>;
<a name="l00388"></a>00388                         <span class="keywordflow">case</span> 'doc':
<a name="l00389"></a>00389                                 <span class="keywordflow">if</span> ($this-&gt;app['catdoc'])       {
<a name="l00390"></a>00390                                         $cmd = $this-&gt;app['catdoc'].' -d utf-8 <span class="stringliteral">"'.$absFile.'"</span>';
<a name="l00391"></a>00391                                         exec($cmd,$res);
<a name="l00392"></a>00392                                         $content = implode(chr(10),$res);
<a name="l00393"></a>00393                                         $contentArr = $this-&gt;pObj-&gt;splitRegularContent($this-&gt;<a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">removeEndJunk</a>($content));
<a name="l00394"></a>00394                                 }
<a name="l00395"></a>00395                         <span class="keywordflow">break</span>;
<a name="l00396"></a>00396                         <span class="keywordflow">case</span> 'pps':
<a name="l00397"></a>00397                         <span class="keywordflow">case</span> 'ppt':
<a name="l00398"></a>00398                                 <span class="keywordflow">if</span> ($this-&gt;app['ppthtml'])      {
<a name="l00399"></a>00399                                         $cmd = $this-&gt;app['ppthtml'].' <span class="stringliteral">"'.$absFile.'"</span>';
<a name="l00400"></a>00400                                         exec($cmd,$res);
<a name="l00401"></a>00401                                         $content = implode(chr(10),$res);
<a name="l00402"></a>00402                                         $content = $this-&gt;pObj-&gt;convertHTMLToUtf8($content);
<a name="l00403"></a>00403                                         $contentArr = $this-&gt;pObj-&gt;splitHTMLContent($this-&gt;<a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">removeEndJunk</a>($content));
<a name="l00404"></a>00404                                         $contentArr['title'] = basename($absFile);      <span class="comment">// Make sure the title doesn't expose the absolute path!</span>
<a name="l00405"></a>00405                                 }
<a name="l00406"></a>00406                         <span class="keywordflow">break</span>;
<a name="l00407"></a>00407                         <span class="keywordflow">case</span> 'xls':
<a name="l00408"></a>00408                                 <span class="keywordflow">if</span> ($this-&gt;app['xlhtml'])       {
<a name="l00409"></a>00409                                         $cmd = $this-&gt;app['xlhtml'].' -nc -te <span class="stringliteral">"'.$absFile.'"</span>';
<a name="l00410"></a>00410                                         exec($cmd,$res);
<a name="l00411"></a>00411                                         $content = implode(chr(10),$res);
<a name="l00412"></a>00412                                         $content = $this-&gt;pObj-&gt;convertHTMLToUtf8($content);
<a name="l00413"></a>00413                                         $contentArr = $this-&gt;pObj-&gt;splitHTMLContent($this-&gt;<a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">removeEndJunk</a>($content));
<a name="l00414"></a>00414                                         $contentArr['title'] = basename($absFile);      <span class="comment">// Make sure the title doesn't expose the absolute path!</span>
<a name="l00415"></a>00415                                 }
<a name="l00416"></a>00416                         <span class="keywordflow">break</span>;
<a name="l00417"></a>00417                         <span class="keywordflow">case</span> 'sxi':
<a name="l00418"></a>00418                         <span class="keywordflow">case</span> 'sxc':
<a name="l00419"></a>00419                         <span class="keywordflow">case</span> 'sxw':
<a name="l00420"></a>00420                         <span class="keywordflow">case</span> 'ods':
<a name="l00421"></a>00421                         <span class="keywordflow">case</span> 'odp':
<a name="l00422"></a>00422                         <span class="keywordflow">case</span> 'odt':
<a name="l00423"></a>00423                                 <span class="keywordflow">if</span> ($this-&gt;app['unzip'])        {
<a name="l00424"></a>00424                                                 <span class="comment">// Read content.xml:</span>
<a name="l00425"></a>00425                                         $cmd = $this-&gt;app['unzip'].' -p '.$absFile.' content.xml';
<a name="l00426"></a>00426                                         exec($cmd,$out);
<a name="l00427"></a>00427                                         $content_xml = implode(chr(10),$out);
<a name="l00428"></a>00428 
<a name="l00429"></a>00429                                                 <span class="comment">// Read meta.xml:</span>
<a name="l00430"></a>00430                                         $cmd = $this-&gt;app['unzip'].' -p '.$absFile.' meta.xml';
<a name="l00431"></a>00431                                         exec($cmd, $out);
<a name="l00432"></a>00432                                         $meta_xml = implode(chr(10),$out);
<a name="l00433"></a>00433 
<a name="l00434"></a>00434                                         $utf8_content = trim(strip_tags(str_replace(<span class="charliteral">'&lt;'</span>,' &lt;',$content_xml)));
<a name="l00435"></a>00435                                         $contentArr = $this-&gt;pObj-&gt;splitRegularContent($utf8_content);
<a name="l00436"></a>00436                                         $contentArr['title'] = basename($absFile);      <span class="comment">// Make sure the title doesn't expose the absolute path!</span>
<a name="l00437"></a>00437 
<a name="l00438"></a>00438                                                 <span class="comment">// Meta information</span>
<a name="l00439"></a>00439                                         $metaContent = <a class="code" href="classt3lib__div.html#459ef7b829f164c32f9ec1b8a02f7774">t3lib_div::xml2tree</a>($meta_xml);
<a name="l00440"></a>00440                                         $metaContent = $metaContent['office:document-meta'][0]['ch']['office:meta'][0]['ch'];
<a name="l00441"></a>00441                                         <span class="keywordflow">if</span> (is_array($metaContent))     {
<a name="l00442"></a>00442                                                 $contentArr['title'] = $metaContent['dc:title'][0]['values'][0] ? $metaContent['dc:title'][0]['values'][0] : $contentArr['title'];
<a name="l00443"></a>00443                                                 $contentArr['description'] = $metaContent['dc:subject'][0]['values'][0].<span class="charliteral">' '</span>.$metaContent['dc:description'][0]['values'][0];
<a name="l00444"></a>00444 
<a name="l00445"></a>00445                                                         <span class="comment">// Keywords collected:</span>
<a name="l00446"></a>00446                                                 <span class="keywordflow">if</span> (is_array($metaContent['meta:keywords'][0]['ch']['meta:keyword']))   {
<a name="l00447"></a>00447                                                         foreach ($metaContent['meta:keywords'][0]['ch']['meta:keyword'] as $kwDat)      {
<a name="l00448"></a>00448                                                                 $contentArr['keywords'].= $kwDat['values'][0].<span class="charliteral">' '</span>;
<a name="l00449"></a>00449                                                         }
<a name="l00450"></a>00450                                                 }
<a name="l00451"></a>00451                                         }
<a name="l00452"></a>00452                                 }
<a name="l00453"></a>00453                         <span class="keywordflow">break</span>;
<a name="l00454"></a>00454                         <span class="keywordflow">case</span> 'rtf':
<a name="l00455"></a>00455                                 <span class="keywordflow">if</span> ($this-&gt;app['unrtf'])        {
<a name="l00456"></a>00456                                         $cmd = $this-&gt;app['unrtf'].' <span class="stringliteral">"'.$absFile.'"</span>';
<a name="l00457"></a>00457                                         exec($cmd,$res);
<a name="l00458"></a>00458                                         $fileContent = implode(chr(10),$res);
<a name="l00459"></a>00459                                         $fileContent = $this-&gt;pObj-&gt;convertHTMLToUtf8($fileContent);
<a name="l00460"></a>00460                                         $contentArr = $this-&gt;pObj-&gt;splitHTMLContent($fileContent);
<a name="l00461"></a>00461                                 }
<a name="l00462"></a>00462                         <span class="keywordflow">break</span>;
<a name="l00463"></a>00463                         <span class="keywordflow">case</span> 'txt':
<a name="l00464"></a>00464                         <span class="keywordflow">case</span> 'csv':             <span class="comment">// Raw text</span>
<a name="l00465"></a>00465                                 $content = t3lib_div::getUrl($absFile);
<a name="l00466"></a>00466                                         <span class="comment">// TODO: Auto-registration of charset???? -&gt; utf-8 (Current assuming western europe...)</span>
<a name="l00467"></a>00467                                 $content = $this-&gt;pObj-&gt;convertHTMLToUtf8($content, 'iso-8859-1');
<a name="l00468"></a>00468                                 $contentArr = $this-&gt;pObj-&gt;splitRegularContent($content);
<a name="l00469"></a>00469                                 $contentArr['title'] = basename($absFile);      <span class="comment">// Make sure the title doesn't expose the absolute path!</span>
<a name="l00470"></a>00470                         <span class="keywordflow">break</span>;
<a name="l00471"></a>00471                         <span class="keywordflow">case</span> 'html':
<a name="l00472"></a>00472                         <span class="keywordflow">case</span> 'htm':
<a name="l00473"></a>00473                                 $fileContent = t3lib_div::getUrl($absFile);
<a name="l00474"></a>00474                                 $fileContent = $this-&gt;pObj-&gt;convertHTMLToUtf8($fileContent);
<a name="l00475"></a>00475                                 $contentArr = $this-&gt;pObj-&gt;splitHTMLContent($fileContent);
<a name="l00476"></a>00476                         <span class="keywordflow">break</span>;
<a name="l00477"></a>00477                         <span class="keywordflow">case</span> 'xml':             <span class="comment">// PHP strip-tags()</span>
<a name="l00478"></a>00478                                 $fileContent = t3lib_div::getUrl($absFile);
<a name="l00479"></a>00479 
<a name="l00480"></a>00480                                         <span class="comment">// Finding charset:</span>
<a name="l00481"></a>00481                                 eregi('^[[:space:]]*&lt;\?xml[^&gt;]+encoding[[:space:]]*=[[:space:]]*[<span class="stringliteral">"\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["</span>\<span class="charliteral">']'</span>,substr($fileContent,0,200),$reg);
<a name="l00482"></a>00482                                 $charset = $reg[1] ? $this-&gt;pObj-&gt;csObj-&gt;parse_charset($reg[1]) : 'utf-8';
<a name="l00483"></a>00483 
<a name="l00484"></a>00484                                         <span class="comment">// Converting content:</span>
<a name="l00485"></a>00485                                 $fileContent = $this-&gt;pObj-&gt;convertHTMLToUtf8(strip_tags(str_replace(<span class="charliteral">'&lt;'</span>,' &lt;',$fileContent)), $charset);
<a name="l00486"></a>00486                                 $contentArr = $this-&gt;pObj-&gt;splitRegularContent($fileContent);
<a name="l00487"></a>00487                                 $contentArr['title'] = basename($absFile);      <span class="comment">// Make sure the title doesn't expose the absolute path!</span>
<a name="l00488"></a>00488                         <span class="keywordflow">break</span>;
<a name="l00489"></a>00489                         <span class="keywordflow">case</span> 'jpg':             <span class="comment">// PHP EXIF</span>
<a name="l00490"></a>00490                         <span class="keywordflow">case</span> 'jpeg':    <span class="comment">// PHP EXIF</span>
<a name="l00491"></a>00491                         <span class="keywordflow">case</span> 'tif':             <span class="comment">// PHP EXIF</span>
<a name="l00492"></a>00492                                 $exif = exif_read_data($absFile, 'IFD0');
<a name="l00493"></a>00493                                 <span class="keywordflow">if</span> ($exif)      {
<a name="l00494"></a>00494                                         $comment = trim($exif['COMMENT'][0].<span class="charliteral">' '</span>.$exif['ImageDescription']);     <span class="comment">// The comments in JPEG files are utf-8, while in Tif files they are 7-bit ascii.</span>
<a name="l00495"></a>00495                                 } <span class="keywordflow">else</span> {
<a name="l00496"></a>00496                                         $comment = '';
<a name="l00497"></a>00497                                 }
<a name="l00498"></a>00498                                 $contentArr = $this-&gt;pObj-&gt;splitRegularContent($comment);
<a name="l00499"></a>00499                                 $contentArr['title'] = basename($absFile);      <span class="comment">// Make sure the title doesn't expose the absolute path!</span>
<a name="l00500"></a>00500                         <span class="keywordflow">break</span>;
<a name="l00501"></a>00501                         <span class="keywordflow">default</span>:
<a name="l00502"></a>00502                                 <span class="keywordflow">return</span> <span class="keyword">false</span>;
<a name="l00503"></a>00503                         <span class="keywordflow">break</span>;
<a name="l00504"></a>00504                 }
<a name="l00505"></a>00505                         <span class="comment">// If no title (and why should there be...) then the file-name is set as title. This will raise the hits considerably if the search matches the document name.</span>
<a name="l00506"></a>00506                 <span class="keywordflow">if</span> (is_array($contentArr) &amp;&amp; !$contentArr['title'])     {
<a name="l00507"></a>00507                         $contentArr['title'] = str_replace(<span class="charliteral">'_'</span>,<span class="charliteral">' '</span>,basename($absFile)); <span class="comment">// Substituting "_" for " " because many filenames may have this instead of a space char.</span>
<a name="l00508"></a>00508                 }
<a name="l00509"></a>00509 
<a name="l00510"></a>00510                 <span class="keywordflow">return</span> $contentArr;
<a name="l00511"></a>00511         }
<a name="l00512"></a>00512 
<a name="l00521"></a><a class="code" href="classtx__indexed__search__extparse.html#c249a5a5534b0ffb8db0ef31d552c9c6">00521</a>         function <a class="code" href="classtx__indexed__search__extparse.html#c249a5a5534b0ffb8db0ef31d552c9c6">fileContentParts</a>($ext,$absFile)        {
<a name="l00522"></a>00522                 $cParts = array(0);
<a name="l00523"></a>00523                 <span class="keywordflow">switch</span> ($ext)   {
<a name="l00524"></a>00524                         <span class="keywordflow">case</span> 'pdf':
<a name="l00525"></a>00525                                         <span class="comment">// Getting pdf-info:</span>
<a name="l00526"></a>00526                                 $cmd = $this-&gt;app['pdfinfo'].' <span class="stringliteral">"'.$absFile.'"</span>';
<a name="l00527"></a>00527                                 exec($cmd,$res);
<a name="l00528"></a>00528                                 $pdfInfo = $this-&gt;<a class="code" href="classtx__indexed__search__extparse.html#b410aa49a477e84b1ea78e5287a6a121">splitPdfInfo</a>($res);
<a name="l00529"></a>00529 
<a name="l00530"></a>00530                                 <span class="keywordflow">if</span> (intval($pdfInfo['pages']))  {
<a name="l00531"></a>00531                                         $cParts = array();
<a name="l00532"></a>00532 
<a name="l00533"></a>00533                                                 <span class="comment">// Calculate mode</span>
<a name="l00534"></a>00534                                         <span class="keywordflow">if</span> ($this-&gt;pdf_mode&gt;0)  {
<a name="l00535"></a>00535                                                 $iter = ceil($pdfInfo['pages']/$this-&gt;pdf_mode);
<a name="l00536"></a>00536                                         } <span class="keywordflow">else</span> {
<a name="l00537"></a>00537                                                 $iter = <a class="code" href="classt3lib__div.html#79f6a47a7658e28e3f65666f8ebc19f8">t3lib_div::intInRange</a>(abs($this-&gt;pdf_mode),1,$pdfInfo['pages']);
<a name="l00538"></a>00538                                         }
<a name="l00539"></a>00539 
<a name="l00540"></a>00540                                                 <span class="comment">// Traverse and create intervals.</span>
<a name="l00541"></a>00541                                         <span class="keywordflow">for</span> ($a=0;$a&lt;$iter;$a++)        {
<a name="l00542"></a>00542                                                 $low = floor($a*($pdfInfo['pages']/$iter))+1;
<a name="l00543"></a>00543                                                 $high = floor(($a+1)*($pdfInfo['pages']/$iter));
<a name="l00544"></a>00544                                                 $cParts[] = $low.<span class="charliteral">'-'</span>.$high;
<a name="l00545"></a>00545                                         }
<a name="l00546"></a>00546                                 }
<a name="l00547"></a>00547                         <span class="keywordflow">break</span>;
<a name="l00548"></a>00548                 }
<a name="l00549"></a>00549                 <span class="keywordflow">return</span> $cParts;
<a name="l00550"></a>00550         }
<a name="l00551"></a>00551 
<a name="l00560"></a><a class="code" href="classtx__indexed__search__extparse.html#b410aa49a477e84b1ea78e5287a6a121">00560</a>         function splitPdfInfo($pdfInfoArray)    {
<a name="l00561"></a>00561                 $res = array();
<a name="l00562"></a>00562                 <span class="keywordflow">if</span> (is_array($pdfInfoArray))    {
<a name="l00563"></a>00563                         foreach($pdfInfoArray as $line) {
<a name="l00564"></a>00564                                 $parts = explode(<span class="charliteral">':'</span>,$line,2);
<a name="l00565"></a>00565                                 <span class="keywordflow">if</span> (count($parts)&gt;1 &amp;&amp; trim($parts[0])) {
<a name="l00566"></a>00566                                         $res[strtolower(trim($parts[0]))] = trim($parts[1]);
<a name="l00567"></a>00567                                 }
<a name="l00568"></a>00568                         }
<a name="l00569"></a>00569                 }
<a name="l00570"></a>00570                 <span class="keywordflow">return</span> $res;
<a name="l00571"></a>00571         }
<a name="l00572"></a>00572 
<a name="l00579"></a><a class="code" href="classtx__indexed__search__extparse.html#89b8bacb2ad8c39422fdc87e8db8e3b6">00579</a>         function removeEndJunk($string) {
<a name="l00580"></a>00580                 <span class="keywordflow">return</span> trim(ereg_replace(<span class="charliteral">'['</span>.chr(10).chr(12).']*$<span class="charliteral">','</span>',$string));
<a name="l00581"></a>00581         }
<a name="l00582"></a>00582 
<a name="l00583"></a>00583 
<a name="l00584"></a>00584 
<a name="l00585"></a>00585 
<a name="l00586"></a>00586 
<a name="l00587"></a>00587 
<a name="l00588"></a>00588 
<a name="l00589"></a>00589 
<a name="l00590"></a>00590 
<a name="l00591"></a>00591 
<a name="l00592"></a>00592 
<a name="l00593"></a>00593 
<a name="l00594"></a>00594         <span class="comment">/************************</span>
<a name="l00595"></a>00595 <span class="comment">         *</span>
<a name="l00596"></a>00596 <span class="comment">         * Backend analyzer</span>
<a name="l00597"></a>00597 <span class="comment">         *</span>
<a name="l00598"></a>00598 <span class="comment">         ************************/</span>
<a name="l00599"></a>00599 
<a name="l00606"></a><a class="code" href="classtx__indexed__search__extparse.html#72c4e6ebab1832eeb7bcba1c2d59b585">00606</a>         function getIcon($extension)    {
<a name="l00607"></a>00607                 <span class="keywordflow">if</span> ($extension=='htm')  $extension = 'html';
<a name="l00608"></a>00608                 <span class="keywordflow">if</span> ($extension=='jpeg') $extension = 'jpg';
<a name="l00609"></a>00609                 <span class="keywordflow">return</span> 'EXT:indexed_search/pi/res/'.$extension.'.gif';
<a name="l00610"></a>00610         }
<a name="l00611"></a>00611 }
<a name="l00612"></a>00612 
<a name="l00613"></a>00613 <span class="keywordflow">if</span> (defined('TYPO3_MODE') &amp;&amp; $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/<span class="keyword">class</span>.external_parser.php'])    {
<a name="l00614"></a>00614     include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/<span class="keyword">class</span>.external_parser.php']);
<a name="l00615"></a>00615 }
<a name="l00616"></a>00616 ?&gt;
</pre></div><?php
  include_once '../doc-typo3-funcs.php';
  get_footer();
?>