TYPO3 3.8.1: typo3_src-3.8.1/typo3/sysext/indexed_search/class.external

00001 <?php
00002 /***************************************************************
00003 *  Copyright notice
00004 *
00005 *  (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)
00006 *  All rights reserved
00007 *
00008 *  This script is part of the TYPO3 project. The TYPO3 project is
00009 *  free software; you can redistribute it and/or modify
00010 *  it under the terms of the GNU General Public License as published by
00011 *  the Free Software Foundation; either version 2 of the License, or
00012 *  (at your option) any later version.
00013 *
00014 *  The GNU General Public License can be found at
00015 *  http://www.gnu.org/copyleft/gpl.html.
00016 *  A copy is found in the textfile GPL.txt and important notices to the license
00017 *  from the author is found in LICENSE.txt distributed with these scripts.
00018 *
00019 *
00020 *  This script is distributed in the hope that it will be useful,
00021 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023 *  GNU General Public License for more details.
00024 *
00025 *  This copyright notice MUST APPEAR in all copies of the script!
00026 ***************************************************************/
00075 class tx_indexed_search_extparse {
00076 
00077                 // This value is also overridden from config.
00078         var $pdf_mode = -20;    // zero: whole PDF file is indexed in one. positive value: Indicates number of pages at a time, eg. "5" would means 1-5,6-10,.... Negative integer would indicate (abs value) number of groups. Eg "3" groups of 10 pages would be 1-4,5-8,9-10
00079 
00080                 // This array is configured in initialization:
00081         var $app = array();
00082         var $ext2itemtype_map = array();
00083         var $supportedExtensions = array();
00084 
00085         var $pObj;              // Reference to parent object (indexer class)
00086 
00087 
00094         function initParser($extension) {
00095 
00096                         // Then read indexer-config and set if appropriate:
00097                 $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
00098 
00099                         // If windows, apply extension to tool name:
00100                 $exe = (TYPO3_OS == 'WIN') ? '.exe' : ''; // lg
00101                 $extOK = FALSE;
00102                 $mainExtension = '';
00103 
00104                         // Ignore extensions
00105                 $ignoreExtensions = t3lib_div::trimExplode(',', strtolower($indexerConfig['ignoreExtensions']),1);
00106                 if (in_array($extension, $ignoreExtensions))    {
00107                         $this->pObj->log_setTSlogMessage('Extension "'.$extension.'" was set to be ignored.',1);
00108 
00109                         return FALSE;
00110                 }
00111 
00112                         // Switch on file extension:
00113                 switch($extension)      {
00114                         case 'pdf':
00115                                         // PDF
00116                                 if ($indexerConfig['pdftools']) {
00117                                         $pdfPath = ereg_replace("\/$",'',$indexerConfig['pdftools']).'/';
00118                                         if (ini_get('safe_mode') || (@is_file($pdfPath.'pdftotext'.$exe) && @is_file($pdfPath.'pdfinfo'.$exe))) {
00119                                                 $this->app['pdfinfo'] = $pdfPath.'pdfinfo'.$exe;
00120                                                 $this->app['pdftotext'] = $pdfPath.'pdftotext'.$exe;
00121                                                         // PDF mode:
00122                                                 $this->pdf_mode = t3lib_div::intInRange($indexerConfig['pdf_mode'],-100,100);
00123                                                 $extOK = TRUE;
00124                                         } else $this->pObj->log_setTSlogMessage("PDF tools was not found in paths '".$pdfPath."pdftotext' and/or '".$pdfPath."pdfinfo'",3);
00125                                 } else $this->pObj->log_setTSlogMessage('PDF tools disabled',1);
00126                         break;
00127                         case 'doc':
00128                                         // Catdoc
00129                                 if ($indexerConfig['catdoc'])   {
00130                                         $catdocPath = ereg_replace("\/$",'',$indexerConfig['catdoc']).'/';
00131                                         if (ini_get('safe_mode') || @is_file($catdocPath.'catdoc'.$exe))        {
00132                                                 $this->app['catdoc'] = $catdocPath.'catdoc'.$exe;
00133                                                 $extOK = TRUE;
00134                                         } else $this->pObj->log_setTSlogMessage("'catdoc' tool for reading Word-files was not found in paths '".$catdocPath."catdoc'",3);
00135                                 } else $this->pObj->log_setTSlogMessage('catdoc tools (Word-files) disabled',1);
00136                         break;
00137                         case 'pps':             // MS PowerPoint(?)
00138                         case 'ppt':             // MS PowerPoint
00139                                         // ppthtml
00140                                 if ($indexerConfig['ppthtml'])  {
00141                                         $ppthtmlPath = ereg_replace('\/$','',$indexerConfig['ppthtml']).'/';
00142                                         if (ini_get('safe_mode') || @is_file($ppthtmlPath.'ppthtml'.$exe)){
00143                                                 $this->app['ppthtml'] = $ppthtmlPath.'ppthtml'.$exe;
00144                                                 $extOK = TRUE;
00145                                         } else $this->pObj->log_setTSlogMessage("'ppthtml' tool for reading Powerpoint-files was not found in paths '".$ppthtmlPath."ppthtml'",3);
00146                                 } else $this->pObj->log_setTSlogMessage('ppthtml tools (Powerpoint-files) disabled',1);
00147                         break;
00148                         case 'xls':             // MS Excel
00149                                         // Xlhtml
00150                                 if ($indexerConfig['xlhtml'])   {
00151                                         $xlhtmlPath = ereg_replace('\/$','',$indexerConfig['xlhtml']).'/';
00152                                         if (ini_get('safe_mode') || @is_file($xlhtmlPath.'xlhtml'.$exe)){
00153                                                 $this->app['xlhtml'] = $xlhtmlPath.'xlhtml'.$exe;
00154                                                 $extOK = TRUE;
00155                                         } else $this->pObj->log_setTSlogMessage("'xlhtml' tool for reading Excel-files was not found in paths '".$xlhtmlPath."xlhtml'",3);
00156                                 } else $this->pObj->log_setTSlogMessage('xlhtml tools (Excel-files) disabled',1);
00157                         break;
00158                         case 'sxc':             // Open Office Calc.
00159                         case 'sxi':             // Open Office Impress
00160                         case 'sxw':             // Open Office Writer
00161                                         // ooo_extract.rb can be found at: http://www.math.umd.edu/~dcarrera/openoffice/misc/tools/ooo_extract.html
00162                                         // I had to run this on debian before I could run the ooo_extract.rb script:
00163                                         //              apt-get install libzlib-ruby1.8
00164                                         //              apt-get install librexml-ruby1.8
00165                                         // ruby + ooo_extract
00166                                 if ($indexerConfig['nativeOOMethod'])   {
00167                                         if (t3lib_extMgm::isLoaded('libunzipped'))      {
00168                                                 $this->app['nativeOOMethod'] = TRUE;
00169                                                 $extOK = TRUE;
00170                                                 $this->pObj->log_setTSlogMessage('Using "libunzipped" for extraction of Open Office files, "'.$extension.'".',1);
00171                                         } else $this->pObj->log_setTSlogMessage('The extension "libunzipped" was not loaded (for extraction of Open Office files, "'.$extension.'")',2);
00172                                 } else {
00173                                         if ($indexerConfig['OOoExtract'])       {
00174                                                 if($indexerConfig['ruby'])      { $rubyPath = ereg_replace('\/$','',$indexerConfig['ruby']).'/'; }
00175 
00176                                                 $oooExPath = ereg_replace('\/$','',$indexerConfig['OOoExtract']).'/';
00177                                                 if (ini_get('safe_mode') || (($rubyPath ? @is_file($rubyPath.'ruby'.$exe) : true) && @is_file($oooExPath.'ooo_extract.rb')))    {
00178                                                         $this->app['ruby'] = $rubyPath.'ruby'.$exe;
00179                                                         $this->app['OOo'] = $oooExPath.'ooo_extract.rb';
00180                                                         $extOK = TRUE;
00181                                                 } else $this->pObj->log_setTSlogMessage("'Ruby and OOo_extract' tools for reading OpenOffice.org documents were not found in paths '".$rubyPath."ruby".$exe."' OR '".$oooExPath."ooo_extract.rb'",3);
00182                                         } else $this->pObj->log_setTSlogMessage('Ruby & OOo_extract tools (OpenOffice-files) disabled',1);
00183                                 }
00184                         break;
00185                         case 'rtf':
00186                                         // Catdoc
00187                                 if ($indexerConfig['unrtf'])    {
00188                                         $unrtfPath = ereg_replace("\/$",'',$indexerConfig['unrtf']).'/';
00189                                         if (ini_get('safe_mode') || @is_file($unrtfPath.'unrtf'.$exe))  {
00190                                                 $this->app['unrtf'] = $unrtfPath.'unrtf'.$exe;
00191                                                 $extOK = TRUE;
00192                                         } else $this->pObj->log_setTSlogMessage("'unrtf' tool for reading RTF-files was not found in paths '".$unrtfPath."unrtf'",3);
00193                                 } else $this->pObj->log_setTSlogMessage('unrtf tool (RTF-files) disabled',1);
00194                         break;
00195                         case 'txt':             // Raw text
00196                         case 'csv':             // Raw text
00197                         case 'xml':             // PHP strip-tags()
00198                         case 'tif':             // PHP EXIF
00199                                 $extOK = TRUE;
00200                         break;
00201                         case 'html':    // PHP strip-tags()
00202                         case 'htm':             // PHP strip-tags()
00203                                 $extOK = TRUE;
00204                                 $mainExtension = 'html';        // making "html" the common "item_type"
00205                         break;
00206                         case 'jpg':             // PHP EXIF
00207                         case 'jpeg':    // PHP EXIF
00208                                 $extOK = TRUE;
00209                                 $mainExtension = 'jpeg';        // making "jpeg" the common item_type
00210                         break;
00211                 }
00212 
00213                         // If extension was OK:
00214                 if ($extOK)     {
00215                         $this->supportedExtensions[$extension] = TRUE;
00216                         $this->ext2itemtype_map[$extension] = $mainExtension ? $mainExtension : $extension;
00217                         return TRUE;
00218                 }
00219         }
00220 
00228         function softInit($extension)   {
00229                 switch($extension)      {
00230                         case 'pdf':             // PDF
00231                         case 'doc':             // MS Word files
00232                         case 'pps':             // MS PowerPoint
00233                         case 'ppt':             // MS PowerPoint
00234                         case 'xls':             // MS Excel
00235                         case 'sxc':             // Open Office Calc.
00236                         case 'sxi':             // Open Office Impress
00237                         case 'sxw':             // Open Office Writer
00238                         case 'rtf':             // RTF documents
00239                         case 'txt':             // ASCII Text documents
00240                         case 'html':    // HTML
00241                         case 'htm':             // HTML
00242                         case 'csv':             // Comma Separated Values
00243                         case 'xml':             // Generic XML
00244                         case 'jpg':             // Jpeg images (EXIF comment)
00245                         case 'jpeg':    // Jpeg images (EXIF comment)
00246                         case 'tif':             // TIF images (EXIF comment)
00247                                 return TRUE;
00248                         break;
00249                 }
00250         }
00251 
00258         function searchTypeMediaTitle($extension)       {
00259 
00260                         // Read indexer-config
00261                 $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
00262 
00263                         // Ignore extensions
00264                 $ignoreExtensions = t3lib_div::trimExplode(',', strtolower($indexerConfig['ignoreExtensions']),1);
00265                 if (in_array($extension, $ignoreExtensions))    {
00266                         return FALSE;
00267                 }
00268 
00269                         // Switch on file extension:
00270                 switch($extension)      {
00271                         case 'pdf':
00272                                         // PDF
00273                                 if ($indexerConfig['pdftools']) {
00274                                         return 'PDF';
00275                                 }
00276                         break;
00277                         case 'doc':
00278                                         // Catdoc
00279                                 if ($indexerConfig['catdoc'])   {
00280                                         return 'MS Word';
00281                                 }
00282                         break;
00283                         case 'pps':             // MS PowerPoint(?)
00284                         case 'ppt':             // MS PowerPoint
00285                                         // ppthtml
00286                                 if ($indexerConfig['ppthtml'])  {
00287                                         return 'MS Powerpoint';
00288                                 }
00289                         break;
00290                         case 'xls':             // MS Excel
00291                                         // Xlhtml
00292                                 if ($indexerConfig['xlhtml'])   {
00293                                         return 'MS Excel';
00294                                 }
00295                         break;
00296                         case 'sxc':             // Open Office Calc.
00297                         case 'sxi':             // Open Office Impress
00298                         case 'sxw':             // Open Office Writer
00299                                 if ($indexerConfig['nativeOOMethod'] || $indexerConfig['ruby']) {
00300                                         return 'Open Office';
00301                                 }
00302                         break;
00303                         case 'rtf':
00304                                         // Catdoc
00305                                 if ($indexerConfig['unrtf'])    {
00306                                         return 'RTF';
00307                                 }
00308                         break;
00309                         case 'html':    // PHP strip-tags()
00310                         case 'jpeg':    // PHP EXIF
00311                         case 'txt':             // Raw text
00312                         case 'csv':             // Raw text
00313                         case 'xml':             // PHP strip-tags()
00314                         case 'tif':             // PHP EXIF
00315                                 return strtoupper($extension);
00316                         break;
00317                                 // NO entry (duplicates or blank):
00318                         case 'htm':             // PHP strip-tags()
00319                         case 'jpg':             // PHP EXIF
00320                         default:
00321                         break;
00322                 }
00323         }
00324 
00331         function isMultiplePageExtension($extension)    {
00332                         // Switch on file extension:
00333                 switch((string)$extension)      {
00334                         case 'pdf':
00335                                 return TRUE;
00336                         break;
00337                 }
00338         }
00339 
00340 
00341 
00342 
00343 
00344 
00345 
00346 
00347 
00348         /************************
00349          *
00350          * Reading documents (for parsing)
00351          *
00352          ************************/
00353 
00362         function readFileContent($ext,$absFile,$cPKey)  {
00363                 unset($contentArr);
00364 
00365                         // Return immediately if initialization didn't set support up:
00366                 if (!$this->supportedExtensions[$ext])  return FALSE;
00367 
00368                         // Switch by file extension
00369                 switch ($ext)   {
00370                         case 'pdf':
00371                                 if ($this->app['pdfinfo'])      {
00372                                                 // Getting pdf-info:
00373                                         $cmd = $this->app['pdfinfo'].' '.$absFile;
00374                                         exec($cmd,$res);
00375                                         $pdfInfo = $this->splitPdfInfo($res);
00376                                         if (intval($pdfInfo['pages']))  {
00377                                                 list($low,$high) = explode('-',$cPKey);
00378 
00379                                                         // Get pdf content:
00380                                                 $tempFileName = t3lib_div::tempnam('Typo3_indexer');            // Create temporary name
00381                                                 @unlink ($tempFileName);        // Delete if exists, just to be safe.
00382                                                 $cmd = $this->app['pdftotext'].' -f '.$low.' -l '.$high.' -enc UTF-8 -q '.$absFile.' '.$tempFileName;
00383                                                 exec($cmd,$res);
00384                                                 if (@is_file($tempFileName))    {
00385                                                         $content = t3lib_div::getUrl($tempFileName);
00386                                                         unlink($tempFileName);
00387                                                 } else {
00388                                                         $this->pObj->log_setTSlogMessage('PDFtoText Failed on this document: '.$absFile.". Maybe the PDF file is locked for printing or encrypted.",2);
00389                                                 }
00390                                                 $contentArr = $this->pObj->splitRegularContent($this->removeEndJunk($content));
00391                                         }
00392                                 }
00393                         break;
00394                         case 'doc':
00395                                 if ($this->app['catdoc'])       {
00396                                         $cmd = $this->app['catdoc'].' -d utf-8 '.$absFile;
00397                                         exec($cmd,$res);
00398                                         $content = implode(chr(10),$res);
00399                                         $contentArr = $this->pObj->splitRegularContent($this->removeEndJunk($content));
00400                                 }
00401                         break;
00402                         case 'pps':
00403                         case 'ppt':
00404                                 if ($this->app['ppthtml'])      {
00405                                         $cmd = $this->app['ppthtml'].' '.$absFile;
00406                                         exec($cmd,$res);
00407                                         $content = implode(chr(10),$res);
00408                                         $content = $this->pObj->convertHTMLToUtf8($content);
00409                                         $contentArr = $this->pObj->splitHTMLContent($this->removeEndJunk($content));
00410                                         $contentArr['title'] = basename($absFile);      // Make sure the title doesn't expose the absolute path!
00411                                 }
00412                         break;
00413                         case 'xls':
00414                                 if ($this->app['xlhtml'])       {
00415                                         $cmd = $this->app['xlhtml'].' -nc -te '.$absFile;
00416                                         exec($cmd,$res);
00417                                         $content = implode(chr(10),$res);
00418                                         $content = $this->pObj->convertHTMLToUtf8($content);
00419                                         $contentArr = $this->pObj->splitHTMLContent($this->removeEndJunk($content));
00420                                         $contentArr['title'] = basename($absFile);      // Make sure the title doesn't expose the absolute path!
00421                                 }
00422                         break;
00423                         case 'sxi':
00424                         case 'sxc':
00425                         case 'sxw':
00426                                 if ($this->app['nativeOOMethod'])       {
00427                                         if (t3lib_extMgm::isLoaded('libunzipped'))      {
00428 
00429                                                 global $TYPO3_CONF_VARS;
00430                                                 require_once(t3lib_extMgm::extPath('libunzipped').'class.tx_libunzipped.php');
00431 
00432                                                         // Initialize Unzip object:
00433                                                 $unzip = t3lib_div::makeInstance('tx_libunzipped');
00434                                                 $ooFiles = $unzip->init($absFile);
00435                                                 if (is_array($ooFiles)) {
00436                                                                 // Read content.xml:
00437                                                         $content_xml = $unzip->getFileFromArchive('content.xml');
00438                                                         $meta_xml = $unzip->getFileFromArchive('meta.xml');
00439                                                         $utf8_content = trim(strip_tags(str_replace('<',' <',$content_xml['content'])));
00440                                                         $contentArr = $this->pObj->splitRegularContent($utf8_content);
00441                                                         $contentArr['title'] = basename($absFile);      // Make sure the title doesn't expose the absolute path!
00442 
00443                                                                 // Meta information
00444                                                         $metaContent = t3lib_div::xml2tree($meta_xml['content']);
00445                                                         $metaContent = $metaContent['office:document-meta'][0]['ch']['office:meta'][0]['ch'];
00446                                                         if (is_array($metaContent))     {
00447                                                                 $contentArr['title'] = $metaContent['dc:title'][0]['values'][0] ? $metaContent['dc:title'][0]['values'][0] : $contentArr['title'];
00448                                                                 $contentArr['description'] = $metaContent['dc:subject'][0]['values'][0].' '.$metaContent['dc:description'][0]['values'][0];
00449 
00450                                                                         // Keywords collected:
00451                                                                 if (is_array($metaContent['meta:keywords'][0]['ch']['meta:keyword']))   {
00452                                                                         foreach($metaContent['meta:keywords'][0]['ch']['meta:keyword'] as $kwDat)       {
00453                                                                                 $contentArr['keywords'].= $kwDat['values'][0].' ';
00454                                                                         }
00455                                                                 }
00456                                                         }
00457                                                 }
00458                                         }
00459                                 } else {
00460                                         if ($this->app['ruby']) {
00461                                                         // Extracting document headers:
00462                                                 $cmd = $this->app['ruby'].' '.$this->app['OOo'].' --heading '.$absFile;
00463                                                 exec($cmd,$headings);
00464 
00465                                                         // Extracting document text:
00466                                                 $cmd = $this->app['ruby'].' '.$this->app['OOo'].' '.$absFile;
00467                                                 exec($cmd,$texts);
00468 
00469                                                 $content = implode(chr(10),$headings).' '.implode(chr(10),$texts);
00470                                                 $contentArr = $this->pObj->splitRegularContent($content);
00471                                                 $contentArr['title'] = basename($absFile);      // Make sure the title doesn't expose the absolute path!
00472                                         }
00473                                 }
00474                         break;
00475                         case 'rtf':
00476                                 if ($this->app['unrtf'])        {
00477                                         $cmd = $this->app['unrtf'].' '.$absFile;
00478                                         exec($cmd,$res);
00479                                         $fileContent = implode(chr(10),$res);
00480                                         $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
00481                                         $contentArr = $this->pObj->splitHTMLContent($fileContent);
00482                                 }
00483                         break;
00484                         case 'txt':
00485                         case 'csv':             // Raw text
00486                                 $content = t3lib_div::getUrl($absFile);
00487                                         // TODO: Auto-registration of charset???? -> utf-8 (Current assuming western europe...)
00488                                 $content = $this->pObj->convertHTMLToUtf8($content, 'iso-8859-1');
00489                                 $contentArr = $this->pObj->splitRegularContent($content);
00490                                 $contentArr['title'] = basename($absFile);      // Make sure the title doesn't expose the absolute path!
00491                         break;
00492                         case 'html':
00493                         case 'htm':
00494                                 $fileContent = t3lib_div::getUrl($absFile);
00495                                 $fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
00496                                 $contentArr = $this->pObj->splitHTMLContent($fileContent);
00497                         break;
00498                         case 'xml':             // PHP strip-tags()
00499                                 $fileContent = t3lib_div::getUrl($absFile);
00500 
00501                                         // Finding charset:
00502                                 eregi('^[[:space:]]*<\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']',substr($fileContent,0,200),$reg);
00503                                 $charset = $reg[1] ? $this->pObj->csObj->parse_charset($reg[1]) : 'utf-8';
00504 
00505                                         // Converting content:
00506                                 $fileContent = $this->pObj->convertHTMLToUtf8(strip_tags(str_replace('<',' <',$fileContent)), $charset);
00507                                 $contentArr = $this->pObj->splitRegularContent($fileContent);
00508                                 $contentArr['title'] = basename($absFile);      // Make sure the title doesn't expose the absolute path!
00509                         break;
00510                         case 'jpg':             // PHP EXIF
00511                         case 'jpeg':    // PHP EXIF
00512                         case 'tif':             // PHP EXIF
00513                                 $exif = exif_read_data($absFile, 'IFD0');
00514                                 if ($exif)      {
00515                                         $comment = trim($exif['COMMENT'][0].' '.$exif['ImageDescription']);     // The comments in JPEG files are utf-8, while in Tif files they are 7-bit ascii.
00516                                 } else {
00517                                         $comment = '';
00518                                 }
00519                                 $contentArr = $this->pObj->splitRegularContent($comment);
00520                                 $contentArr['title'] = basename($absFile);      // Make sure the title doesn't expose the absolute path!
00521                         break;
00522                         default:
00523                                 return false;
00524                         break;
00525                 }
00526                         // If no title (and why should there be...) then the file-name is set as title. This will raise the hits considerably if the search matches the document name.
00527                 if (is_array($contentArr) && !$contentArr['title'])     {
00528                         $contentArr['title'] = str_replace('_',' ',basename($absFile)); // Substituting "_" for " " because many filenames may have this instead of a space char.
00529                 }
00530 
00531                 return $contentArr;
00532         }
00533 
00542         function fileContentParts($ext,$absFile)        {
00543                 $cParts = array(0);
00544                 switch ($ext)   {
00545                         case 'pdf':
00546                                         // Getting pdf-info:
00547                                 $cmd = $this->app['pdfinfo'].' '.$absFile;
00548                                 exec($cmd,$res);
00549                                 $pdfInfo = $this->splitPdfInfo($res);
00550 
00551                                 if (intval($pdfInfo['pages']))  {
00552                                         $cParts = array();
00553 
00554                                                 // Calculate mode
00555                                         if ($this->pdf_mode>0)  {
00556                                                 $iter = ceil($pdfInfo['pages']/$this->pdf_mode);
00557                                         } else {
00558                                                 $iter = t3lib_div::intInRange(abs($this->pdf_mode),1,$pdfInfo['pages']);
00559                                         }
00560 
00561                                                 // Traverse and create intervals.
00562                                         for ($a=0;$a<$iter;$a++)        {
00563                                                 $low = floor($a*($pdfInfo['pages']/$iter))+1;
00564                                                 $high = floor(($a+1)*($pdfInfo['pages']/$iter));
00565                                                 $cParts[] = $low.'-'.$high;
00566                                         }
00567                                 }
00568                         break;
00569                 }
00570                 return $cParts;
00571         }
00572 
00581         function splitPdfInfo($pdfInfoArray)    {
00582                 $res = array();
00583                 if (is_array($pdfInfoArray))    {
00584                         foreach($pdfInfoArray as $line) {
00585                                 $parts = explode(':',$line,2);
00586                                 if (count($parts)>1 && trim($parts[0])) {
00587                                         $res[strtolower(trim($parts[0]))] = trim($parts[1]);
00588                                 }
00589                         }
00590                 }
00591                 return $res;
00592         }
00593 
00600         function removeEndJunk($string) {
00601                 return trim(ereg_replace('['.chr(10).chr(12).']*$','',$string));
00602         }
00603 
00604 
00605 
00606 
00607 
00608 
00609 
00610 
00611 
00612 
00613 
00614 
00615         /************************
00616          *
00617          * Backend analyzer
00618          *
00619          ************************/
00620 
00627         function getIcon($extension)    {
00628                 if ($extension=='htm')  $extension = 'html';
00629                 if ($extension=='jpeg') $extension = 'jpg';
00630                 return 'EXT:indexed_search/pi/res/'.$extension.'.gif';
00631         }
00632 }
00633 
00634 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.external_parser.php'])    {
00635     include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.external_parser.php']);
00636 }
00637 ?>
Documentation TYPO3 par Ameos

class.external_parser.php