Documentation TYPO3 par Ameos |
00001 <?php 00002 /*************************************************************** 00003 * Copyright notice 00004 * 00005 * (c) 2001-2004 Kasper Skaarhoj (kasperYYYY@typo3.com) 00006 * All rights reserved 00007 * 00008 * This script is part of the TYPO3 project. The TYPO3 project is 00009 * free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * The GNU General Public License can be found at 00015 * http://www.gnu.org/copyleft/gpl.html. 00016 * A copy is found in the textfile GPL.txt and important notices to the license 00017 * from the author is found in LICENSE.txt distributed with these scripts. 00018 * 00019 * 00020 * This script is distributed in the hope that it will be useful, 00021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00023 * GNU General Public License for more details. 00024 * 00025 * This copyright notice MUST APPEAR in all copies of the script! 00026 ***************************************************************/ 00108 require_once(PATH_t3lib.'class.t3lib_htmlmail.php'); 00109 00110 00118 class tx_indexedsearch_indexer { 00119 00120 // Messages: 00121 var $reasons = array( 00122 -1 => 'mtime matched the document, so no changes detected and no content updated', 00123 -2 => 'The minimum age was not exceeded', 00124 1 => "The configured max-age was exceeded for the document and thus it's indexed.", 00125 2 => 'The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.', 00126 3 => 'The minimum age was exceed, but mtime was not set, so the page was indexed.', 00127 4 => 'Page has never been indexed (is not represented in the index_phash table).' 00128 ); 00129 var $convChars=array( 00130 'ÁÉÚÍÄËÜÖÏÆØÅ', 00131 'áéúíâêûôîæøå' 00132 ); 00133 00134 // HTML code blocks to exclude from indexing: 00135 var $excludeSections = 'script,style'; 00136 00137 // Supported Extensions for external files: 00138 var $supportedExtensions = array( 00139 'pdf' => 1, 00140 'doc' => 1, 00141 'txt' => 1, 00142 'html' => 1, 00143 'htm' => 1 00144 ); 00145 00146 // This value is also overridden from config. 00147 var $pdf_mode = -20; // zero: whole PDF file is indexed in one. positive value: Indicates number of pages at a time, eg. "5" would means 1-5,6-10,.... Negative integer would indicate (abs value) number of groups. Eg "3" groups of 10 pages would be 1-4,5-8,9-10 00148 00149 // This array is reset and configured in initialization: 00150 var $app = array( 00151 'pdftotext' => '/usr/local/bin/pdftotext', 00152 'pdfinfo' => '/usr/local/bin/pdfinfo', 00153 'catdoc' => '/usr/local/bin/catdoc' 00154 ); 00155 00156 // Fe-group list (pages might be indexed separately for each usergroup combination to support search in access limited pages!) 00157 var $defaultGrList='0,-1'; 00158 00159 // Min/Max times: 00160 var $tstamp_maxAge = 0; // If set, this tells a number of seconds that is the maximum age of an indexed document. Regardless of mtime the document will be re-indexed if this limit is exceeded. 00161 var $tstamp_minAge = 0; // If set, this tells a minimum limit before a document can be indexed again. This is regardless of mtime. 00162 00163 // INTERNALS: 00164 var $defaultContentArray=array( 00165 'title' => '', 00166 'description' => '', 00167 'keywords' => '', 00168 'body' => '', 00169 ); 00170 var $wordcount = 0; 00171 var $Itypes = array( 00172 'html' => 1, 00173 'htm' => 1, 00174 'pdf' => 2, 00175 'doc' => 3, 00176 'txt' => 4 00177 ); 00178 var $conf = array(); // Configuration set internally 00179 var $hash = array(); // Hash array, contains phash and phash_grouping 00180 var $contentParts = array(); 00181 var $pObj = ''; // Parent object, reference to global TSFE 00182 var $content_md5h = ''; 00183 00184 var $cHashParams = array(); // cHashparams array 00185 var $mtime = 0; // If set, then the mtime of the document must be different in order to be indexed. 00186 var $rootLine = array(); // Root line from TSFE 00187 00188 var $freqRange = 65000; 00189 var $freqMax = 0.1; 00190 00191 00192 00193 00200 function hook_indexContent(&$pObj) { 00201 00202 if ($pObj->config['config']['index_enable']) { 00203 if (!$pObj->no_cache) { 00204 $GLOBALS['TT']->push('Index page',''); 00205 00206 // Setting parent object: 00207 $this->pObj = &$pObj; 00208 00209 // Init and start indexing: 00210 $this->init(); 00211 $this->indexTypo3PageContent(); 00212 $GLOBALS['TT']->pull(); 00213 } else { 00214 $GLOBALS['TT']->push('Index page',''); 00215 $GLOBALS['TT']->setTSlogMessage('Index page? No, page was set to "no_cache" and so cannot be indexed.'); 00216 $GLOBALS['TT']->pull(); 00217 } 00218 } 00219 } 00220 00221 00222 00223 00224 00225 00226 00227 00228 00229 00230 00231 /******************************** 00232 * 00233 * Initialization 00234 * 00235 *******************************/ 00236 00242 function init() { 00243 00244 // Initializing: 00245 $this->cHashParams = $this->pObj->cHash_array; 00246 if (is_array($this->cHashParams) && count($this->cHashParams)) { 00247 $this->cHashParams['cHash'] = $this->pObj->cHash; // Add this so that URL's come out right... 00248 } 00249 00250 // Modification time of page and root line transferred: 00251 $this->mtime = $this->pObj->register['SYS_LASTCHANGED']; 00252 $this->rootLine = $this->pObj->config['rootLine']; 00253 00254 // Setting up internal configuration from config array: 00255 $this->conf = array(); 00256 $this->conf['index_externals'] = $this->pObj->config['config']['index_externals']; 00257 $this->conf['index_descrLgd'] = $this->pObj->config['config']['index_descrLgd']; 00258 00259 // Setting phash / phash_grouping which identifies the indexed page based on some of these variables: 00260 $this->setT3Hashes(); 00261 00262 // Initialize tools for reading PDF and Word documents: 00263 $this->initExternalReaders(); 00264 } 00265 00271 function initExternalReaders() { 00272 // PDF + WORD tools: 00273 // First reset the class default settings (disabling) 00274 $this->app = array(); 00275 $this->supportedExtensions['pdf'] = 0; 00276 $this->supportedExtensions['doc'] = 0; 00277 00278 // Then read indexer-config and set if appropriate: 00279 $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); 00280 00281 // PDF 00282 if ($indexerConfig['pdftools']) { 00283 $pdfPath = ereg_replace("\/$",'',$indexerConfig['pdftools']).'/'; 00284 if ((ini_get('safe_mode') && $pdfPath) || (@is_file($pdfPath.'pdftotext') && @is_file($pdfPath.'pdfinfo'))) { 00285 $this->app['pdfinfo'] = $pdfPath.'pdfinfo'; 00286 $this->app['pdftotext'] = $pdfPath.'pdftotext'; 00287 $this->supportedExtensions['pdf'] = 1; 00288 } else $GLOBALS['TT']->setTSlogMessage("PDF tools was not found in paths '".$pdfPath."pdftotext' and/or '".$pdfPath."pdfinfo'",3); 00289 } else $GLOBALS['TT']->setTSlogMessage('PDF tools disabled',1); 00290 00291 // Catdoc 00292 if ($indexerConfig['catdoc']) { 00293 $catdocPath = ereg_replace("\/$",'',$indexerConfig['catdoc']).'/'; 00294 if (is_file($catdocPath.'catdoc')) { 00295 $this->app['catdoc'] = $catdocPath.'catdoc'; 00296 $this->supportedExtensions['doc'] = 1; 00297 } else $GLOBALS['TT']->setTSlogMessage("'catdoc' tool for reading Word-files was not found in paths '".$catdocPath."catdoc'",3); 00298 } else $GLOBALS['TT']->setTSlogMessage('catdoc tools (Word-files) disabled',1); 00299 00300 // PDF mode: 00301 $this->pdf_mode = t3lib_div::intInRange($indexerConfig['pdf_mode'],-100,100); 00302 } 00303 00304 00305 00306 00307 00308 00309 00310 00311 00312 00313 00314 /******************************** 00315 * 00316 * Indexing 00317 * 00318 *******************************/ 00319 00325 function indexTypo3PageContent() { 00326 00327 $check = $this->checkMtimeTstamp($this->mtime, $this->tstamp_maxAge, $this->tstamp_minAge, $this->hash['phash']); 00328 # WHAT IS THIS? Test that it works... $is_grlist = $this->is_grlist_set($phash_x); // Use $this->hash['phash']? 00329 00330 if ($check > 0 || !$is_grlist) { 00331 00332 // Setting message: 00333 if ($check > 0) { 00334 $GLOBALS['TT']->setTSlogMessage('Indexing needed, reason: '.$this->reasons[$check],1); 00335 } else { 00336 $GLOBALS['TT']->setTSlogMessage('Indexing needed, reason: Updates gr_list!',1); 00337 } 00338 00339 // Divide into title,keywords,description and body: 00340 $GLOBALS['TT']->push('Split content',''); 00341 $this->contentParts = $this->splitHTMLContent($this->pObj->content); 00342 if ($this->pObj->indexedDocTitle) $this->contentParts['title'] = $this->pObj->indexedDocTitle; 00343 $GLOBALS['TT']->pull(); 00344 00345 // Calculating a hash over what is to be the actual page content. Maybe this hash should not include title,description and keywords? The bodytext is the primary concern. (on the other hand a changed page-title would make no difference then, so dont!) 00346 $this->content_md5h = $this->md5inthash(implode($this->contentParts,'')); 00347 // This function checks if there is already a page (with gr_list = 0,-1) indexed and if that page has the very same contentHash. 00348 // If the contentHash is the same, then we can rest assured that this page is already indexed and regardless of mtime and origContent we don't need to do anything more. 00349 // This will also prevent pages from being indexed if a fe_users has logged in and it turns out that the page content is not changed anyway. fe_users logged in should always search with hash_gr_list = "0,-1" OR "[their_group_list]". This situation will be prevented only if the page has been indexed with no user login on before hand. Else the page will be indexed by users until that event. However that does not present a serious problem. 00350 $checkCHash = $this->checkContentHash(); 00351 if (!is_array($checkCHash)) { 00352 $Pstart=t3lib_div::milliseconds(); 00353 // Splitting words 00354 $GLOBALS['TT']->push('Extract words from content',''); 00355 $splitInWords = $this->procesWordsInArrays($this->contentParts); 00356 $GLOBALS['TT']->pull(); 00357 00358 // Analyse the indexed words. 00359 $GLOBALS['TT']->push('Analyse the extracted words',''); 00360 $indexArr = $this->indexAnalyze($splitInWords); 00361 $GLOBALS['TT']->pull(); 00362 00363 // Submitting page (phash) record 00364 $GLOBALS['TT']->push('Submitting page',''); 00365 $this->submitPage(); 00366 $GLOBALS['TT']->pull(); 00367 00368 // Check words and submit to word list if not there 00369 $GLOBALS['TT']->push('Check word list and submit words',''); 00370 $this->checkWordList($indexArr); 00371 $this->submitWords($indexArr,$this->hash['phash']); 00372 $GLOBALS['TT']->pull(); 00373 00374 // Set parsetime 00375 $this->updateParsetime($this->hash['phash'],t3lib_div::milliseconds()-$Pstart); 00376 00377 // Checking external files if configured for. 00378 $GLOBALS['TT']->push('Checking external files',''); 00379 if ($this->conf['index_externals']) { 00380 $this->extractLinks($this->pObj->content); 00381 } 00382 $GLOBALS['TT']->pull(); 00383 } else { 00384 $this->updateTstamp($this->hash['phash'],$this->mtime); // Update the timestatmp 00385 $this->update_grlist($checkCHash['phash'],$this->hash['phash']); 00386 $this->updateRootline(); 00387 $GLOBALS['TT']->setTSlogMessage('Indexing not needed, the contentHash, '.$this->content_md5h.', has not changed. Timestamp, grlist and rootline updated if necessary.'); 00388 } 00389 } else { 00390 $GLOBALS['TT']->setTSlogMessage('Indexing not needed, reason: '.$this->reasons[$check]); 00391 } 00392 } 00393 00400 function splitHTMLContent($content) { 00401 00402 # divide head from body ( u-ouh :) ) 00403 00404 $contentArr=$this->defaultContentArray; 00405 $contentArr['body'] = stristr($content,'<body'); 00406 $headPart = substr($content,0,-strlen($contentArr['body'])); 00407 00408 # get title 00409 $this->embracingTags($headPart,'TITLE',$contentArr['title'],$dummy2,$dummy); 00410 $titleParts = explode(':',$contentArr['title'],2); 00411 $contentArr['title'] = trim(isset($titleParts[1]) ? $titleParts[1] : $titleParts[0]); 00412 00413 # get keywords and description metatags 00414 for($i=0;$this->embracingTags($headPart,'meta',$dummy,$headPart,$meta[$i]);$i++) { /*nothing*/ } 00415 for($i=0;isset($meta[$i]);$i++) { 00416 $meta[$i] = t3lib_div::get_tag_attributes($meta[$i]); 00417 if(stristr($meta[$i]['name'],'keywords')) $contentArr['keywords'].=','.$meta[$i]['content']; 00418 if(stristr($meta[$i]['name'],'description')) $contentArr['description'].=','.$meta[$i]['content']; 00419 } 00420 00421 $this->typoSearchTags($contentArr['body']); 00422 00423 # get rid of unwanted sections (ie. scripting and style stuff) in body 00424 $tagList = explode(',',$this->excludeSections); 00425 reset($tagList); 00426 while(list(,$tag)=each($tagList)) { 00427 while($this->embracingTags($contentArr['body'],$tag,$dummy,$contentArr['body'],$dummy2)); 00428 } 00429 00430 # remove tags, but first make sure we don't concatenate words by doing it 00431 $contentArr['body'] = str_replace('<',' <',$contentArr['body']); 00432 $contentArr['body'] = trim(strip_tags($contentArr['body'])); 00433 00434 $contentArr['keywords'] = trim($contentArr['keywords']); 00435 $contentArr['description'] = trim($contentArr['description']); 00436 # ta-dah! 00437 return $contentArr; 00438 } 00439 00446 function splitRegularContent($content) { 00447 $contentArr = $this->defaultContentArray; 00448 $contentArr['body'] = $content; 00449 00450 return $contentArr; 00451 } 00452 00459 function procesWordsInArrays($contentArr) { 00460 00461 # split all parts to words 00462 reset($contentArr); 00463 while(list($key,)=each($contentArr)) { 00464 if (function_exists('html_entity_decode')) $contentArr[$key] = html_entity_decode($contentArr[$key]); 00465 $contentArr[$key] = $this->strtolower_all($contentArr[$key]); 00466 $this->split2words($contentArr[$key]); 00467 } 00468 00469 # for title, keywords, and description we don't want duplicates 00470 $contentArr['title'] = array_unique($contentArr['title']); 00471 $contentArr['keywords'] = array_unique($contentArr['keywords']); 00472 $contentArr['description'] = array_unique($contentArr['description']); 00473 return $contentArr; 00474 } 00475 00482 function bodyDescription($contentArr) { 00483 # Setting description 00484 $maxL = t3lib_div::intInRange($this->conf['index_descrLgd'],0,255,200); 00485 if ($maxL) { 00486 if (function_exists('html_entity_decode')) $bodyDescription = html_entity_decode(trim($contentArr['body'])); 00487 $bodyDescription = implode(' ',split('[[:space:],]+',substr($bodyDescription,0,$maxL*2))); // Takes the double lenght first, because whitespace may be removed and thus shorten the string more yet. 00488 $bodyDescription=substr($bodyDescription,0,$maxL); 00489 } 00490 return $bodyDescription; 00491 } 00492 00499 function extractLinks($content) { 00500 $extract = t3lib_div::makeInstance('t3lib_htmlmail'); 00501 $extract->extractHtmlInit($content,''); 00502 $extract->extractHyperLinks(); 00503 #debug($extract->theParts['html']['hrefs']); 00504 if (is_array($extract->theParts['html']['hrefs'])) { 00505 reset($extract->theParts['html']['hrefs']); 00506 while(list(,$linkInfo)=each($extract->theParts['html']['hrefs'])) { 00507 $linkInfo['ref'] = t3lib_div::htmlspecialchars_decode($linkInfo['ref']); 00508 #debug($linkInfo['ref'],1); 00509 if (strstr($linkInfo['ref'],'?') && strstr($linkInfo['ref'],'jumpurl=')) { 00510 $qParts = parse_url($linkInfo['ref']); 00511 #debug($qParts); 00512 $theJumpurlFile = $this->getJumpurl($qParts['query']); 00513 // debug($theJumpurlFile); 00514 if ($theJumpurlFile && @is_file($theJumpurlFile)) { 00515 // debug($theJumpurlFile); 00516 $this->indexRegularDocument($theJumpurlFile); 00517 } 00518 } elseif (@is_file($linkInfo['ref'])) { 00519 $this->indexRegularDocument($linkInfo['ref']); 00520 } 00521 } 00522 } 00523 } 00524 00531 function getJumpurl($query) { 00532 $res = parse_str($query); 00533 # debug(array($res),'getJumpurl'); 00534 00535 return $jumpurl; 00536 } 00537 00544 function splitPdfInfo($pdfInfoArray) { 00545 $res = array(); 00546 if (is_array($pdfInfoArray)) { 00547 reset($pdfInfoArray); 00548 while(list(,$line)=each($pdfInfoArray)) { 00549 $parts = explode(':',$line,2); 00550 if (count($parts)>1 && trim($parts[0])) { 00551 $res[strtolower(trim($parts[0]))] = trim($parts[1]); 00552 } 00553 } 00554 } 00555 return $res; 00556 } 00557 00564 function indexRegularDocument($file) { 00565 // init 00566 $fI=pathinfo($file); 00567 $ext = strtolower($fI['extension']); 00568 $absFile = PATH_site.$file; 00569 #debug($file); 00570 // 00571 if (@is_file($absFile) && $this->supportedExtensions[$ext]) { 00572 $mtime = filemtime($absFile); 00573 $cParts = $this->fileContentParts($ext,$absFile); 00574 // debug($cParts); 00575 reset($cParts); 00576 while(list(,$cPKey)=each($cParts)) { 00577 $GLOBALS['TT']->push('Index: '.str_replace('.','_',basename($file)).($cPKey?'#'.$cPKey:''),''); 00578 $Pstart = t3lib_div::milliseconds(); 00579 $subinfo=array('key'=>$cPKey); 00580 $phash_arr = $this->setExtHashes($file,$subinfo); 00581 // debug($phash_arr); 00582 00583 $check = $this->checkMtimeTstamp($mtime, $this->tstamp_maxAge, $this->tstamp_minAge, $phash_arr['phash']); 00584 if ($check > 0) { 00585 $GLOBALS['TT']->setTSlogMessage('Indexing needed, reason: '.$this->reasons[$check],1); 00586 // Divide into title,keywords,description and body: 00587 $GLOBALS['TT']->push('Split content',''); 00588 $contentParts = $this->readFileContent($ext,$absFile,$cPKey); 00589 #debug($contentParts); 00590 $GLOBALS['TT']->pull(); 00591 if (is_array($contentParts)) { 00592 // Calculating a hash over what is to be the actual content. (see indexTypo3PageContent()) 00593 $content_md5h = $this->md5inthash(implode($contentParts,'')); 00594 00595 if ($this->checkExternalDocContentHash($phash_arr['phash_grouping'], $content_md5h)) { 00596 // Splitting words 00597 $GLOBALS['TT']->push('Extract words from content',''); 00598 $splitInWords = $this->procesWordsInArrays($contentParts); 00599 $GLOBALS['TT']->pull(); 00600 00601 // Analyse the indexed words. 00602 $GLOBALS['TT']->push('Analyse the extracted words',''); 00603 $indexArr = $this->indexAnalyze($splitInWords); 00604 $GLOBALS['TT']->pull(); 00605 00606 // Submitting page (phash) record 00607 $GLOBALS['TT']->push('Submitting page',''); 00608 $size=filesize($absFile); 00609 $ctime=filemtime($absFile); // Unfortunately I cannot determine WHEN a file is originally made - so I must return the modification time... 00610 $this->submitFilePage($phash_arr,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts); 00611 $GLOBALS['TT']->pull(); 00612 00613 // Check words and submit to word list if not there 00614 $GLOBALS['TT']->push('Check word list and submit words',''); 00615 $this->checkWordList($indexArr); 00616 $this->submitWords($indexArr,$phash_arr['phash']); 00617 $GLOBALS['TT']->pull(); 00618 00619 // Set parsetime 00620 $this->updateParsetime($phash_arr['phash'],t3lib_div::milliseconds()-$Pstart); 00621 } else { 00622 $this->updateTstamp($phash_arr['phash'],$mtime); // Update the timestamp 00623 $GLOBALS['TT']->setTSlogMessage('Indexing not needed, the contentHash, '.$content_md5h.', has not changed. Timestamp updated.'); 00624 } 00625 } else { 00626 $GLOBALS['TT']->setTSlogMessage('Could not index file! Unsupported extension.'); 00627 } 00628 } else { 00629 $GLOBALS['TT']->setTSlogMessage('Indexing not needed, reason: '.$this->reasons[$check]); 00630 } 00631 // Checking and setting sections: 00632 # $this->submitFile_grlist($phash_arr['phash']); // Setting a gr_list record if there is none already (set for default fe_group) 00633 $this->submitFile_section($phash_arr['phash']); // Setting a section-record for the file. This is done also if the file is not indexed. Notice that section records are deleted when the page is indexed. 00634 $GLOBALS['TT']->pull(); 00635 } 00636 } 00637 } 00638 00647 function readFileContent($ext,$absFile,$cPKey) { 00648 switch ($ext) { 00649 case 'pdf': 00650 if ($this->app['pdfinfo']) { 00651 #debug($this->app); 00652 // Getting pdf-info: 00653 $cmd = $this->app['pdfinfo'].' '.$absFile; 00654 exec($cmd,$res); 00655 $pdfInfo=$this->splitPdfInfo($res); 00656 00657 if (intval($pdfInfo['pages'])) { 00658 list($low,$high) = explode('-',$cPKey); 00659 00660 // Get pdf content: 00661 $tempFileName = t3lib_div::tempnam('Typo3_indexer'); // Create temporary name 00662 @unlink ($tempFileName); // Delete if exists, just to be safe. 00663 $cmd = $this->app['pdftotext'].' -f '.$low.' -l '.$high.' -q '.$absFile.' '.$tempFileName; 00664 // debug($cmd,1); 00665 exec($cmd,$res); 00666 if (@is_file($tempFileName)) { 00667 $content = t3lib_div::getUrl($tempFileName); 00668 unlink($tempFileName); 00669 } else { 00670 $GLOBALS['TT']->setTSlogMessage('PDFtoText Failed on this document: '.$absFile.". Maybe the PDF file is locked for printing or encrypted.",2); 00671 } 00672 $contentArr = $this->splitRegularContent($content); 00673 } 00674 } 00675 break; 00676 case 'doc': 00677 if ($this->app['catdoc']) { 00678 $cmd = $this->app['catdoc'].' '.$absFile; 00679 exec($cmd,$res); 00680 $content = implode(chr(10),$res); 00681 $contentArr = $this->splitRegularContent($content); 00682 } 00683 break; 00684 case 'txt': 00685 $content = t3lib_div::getUrl($absFile); 00686 $contentArr = $this->splitRegularContent($content); 00687 break; 00688 case 'html': 00689 case 'htm': 00690 $fileContent = t3lib_div::getUrl($absFile); 00691 $contentArr = $this->splitHTMLContent($fileContent); 00692 break; 00693 default: 00694 return false; 00695 break; 00696 } 00697 // If no title (and why should there be...) then the file-name is set as title. This will raise the hits considerably if the search matches the document name. 00698 if (!$contentArr['title']) { 00699 $contentArr['title']=str_replace('_',' ',basename($absFile)); // Substituting "_" for " " because many filenames may have this instead of a space char. 00700 } 00701 return $contentArr; 00702 } 00703 00711 function fileContentParts($ext,$absFile) { 00712 $cParts=array(0); 00713 switch ($ext) { 00714 case 'pdf': 00715 // Getting pdf-info: 00716 $cmd = $this->app['pdfinfo'].' '.$absFile; 00717 exec($cmd,$res); 00718 $pdfInfo=$this->splitPdfInfo($res); 00719 // debug($pdfInfo); 00720 00721 if (intval($pdfInfo['pages'])) { 00722 $cParts=array(); 00723 // Calculate mode 00724 // Calculate mode 00725 if ($this->pdf_mode>0) { 00726 $iter=ceil($pdfInfo['pages']/$this->pdf_mode); 00727 } else { 00728 $iter=t3lib_div::intInRange(abs($this->pdf_mode),1,$pdfInfo['pages']); 00729 } 00730 for ($a=0;$a<$iter;$a++) { 00731 $low=floor($a*($pdfInfo['pages']/$iter))+1; 00732 $high=floor(($a+1)*($pdfInfo['pages']/$iter)); 00733 $cParts[]=$low.'-'.$high; 00734 } 00735 } 00736 break; 00737 } 00738 return $cParts; 00739 } 00740 00741 00754 function embracingTags($string,$tagName,&$tagContent,&$stringAfter,&$paramList) { 00755 $endTag = '</'.$tagName.'>'; 00756 $startTag = '<'.$tagName; 00757 $isTagInText = stristr($string,$startTag); // stristr used because we want a case-insensitive search for the tag. 00758 if(!$isTagInText) return false; // if the tag was not found, return false 00759 00760 list($paramList,$isTagInText) = explode('>',substr($isTagInText,strlen($startTag)),2); 00761 $afterTagInText = stristr($isTagInText,$endTag); 00762 if ($afterTagInText) { 00763 $tagContent = substr($isTagInText,0,-strlen($afterTagInText)); 00764 $stringAfter = substr($afterTagInText,strlen($endTag)); 00765 } else { // If there was no ending tag, the tagContent is blank and anything after the tag it self is returned. 00766 $tagContent=''; 00767 $stringAfter = $isTagInText; 00768 } 00769 // debug(array($tagContent,$stringAfter)); 00770 return true; 00771 } 00772 00780 function indexAnalyze($content) { 00781 $indexArr = Array(); 00782 $counter = 0; 00783 00784 $this->analyzeHeaderinfo($indexArr,$content,'title',7); 00785 $this->analyzeHeaderinfo($indexArr,$content,'keywords',6); 00786 $this->analyzeHeaderinfo($indexArr,$content,'description',5); 00787 $this->analyzeBody($indexArr,$content); 00788 00789 return ($indexArr); 00790 } 00791 00801 function analyzeHeaderinfo(&$retArr,$content,$key,$offset) { 00802 reset($content[$key]); 00803 while(list(,$val)=each($content[$key])) { 00804 $val = substr($val,0,30); // Max 30 - because the baseword varchar IS 30. This MUST be the same. 00805 $retArr[$val]['cmp'] = $retArr[$val]['cmp']|pow(2,$offset); 00806 $retArr[$val]['count'] = $retArr[$val]['count']+1; 00807 $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7)); 00808 $retArr[$val]['metaphone'] = $this->metaphone($val); 00809 $this->wordcount++; 00810 } 00811 } 00812 00820 function analyzeBody(&$retArr,$content) { 00821 reset($content['body']); 00822 while(list($key,$val)=each($content['body'])) { 00823 $val = substr($val,0,30); // Max 30 - because the baseword varchar IS 30. This MUST be the same. 00824 if(!isset($retArr[$val])) { 00825 $retArr[$val]['first']=$key; 00826 $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7)); 00827 $retArr[$val]['metaphone'] = $this->metaphone($val); 00828 } 00829 $retArr[$val]['count'] = $retArr[$val]['count']+1; 00830 $this->wordcount++; 00831 } 00832 } 00833 00840 function typoSearchTags(&$body) { 00841 $expBody = explode('<!--TYPO3SEARCH_',$body); 00842 #debug($expBody); 00843 if(count($expBody)>1) { 00844 $body = ''; 00845 reset($expBody); 00846 while(list(,$val)=each($expBody)) { 00847 $part = explode('-->',$val,2); 00848 if(trim($part[0])=='begin') { 00849 $body .= $part[1]; 00850 $prev = ''; 00851 } elseif(trim($part[0])=='end') { 00852 $body .= $prev; 00853 } else { 00854 $prev = $val; 00855 } 00856 #debug($part); 00857 } 00858 #debug(array($body)); 00859 return true; 00860 } else { 00861 return false; 00862 } 00863 } 00864 00865 00866 00867 00868 00869 00870 00871 00872 00873 00874 00875 00876 00877 00878 /********************************** 00879 * 00880 * Words 00881 * 00882 **********************************/ 00883 00891 function split2words(&$string) { 00892 $words = split('[[:space:],]+',$string); 00893 $reg='['.quotemeta('().,_?!:-').']*'; 00894 $reg='[^[:alnum:]'.$this->convChars[0].$this->convChars[1].']*'; 00895 00896 #debug($words); 00897 #debug(array($string)); 00898 reset($words); 00899 $matches=array(); 00900 while(list(,$w)=each($words)) { 00901 $w=trim($w); 00902 $w=ereg_replace('^'.$reg,'',$w); 00903 $w=ereg_replace($reg.'$','',$w); 00904 if ($this->wordOK($w)) {$matches[]=$w;} 00905 } 00906 # debug($matches); 00907 $string =$matches; 00908 00909 00910 /* 00911 preg_match_all("/\b(\w[\w']*\w+|\w+)\b/", $string ,$matches); 00912 $string = $matches[0]; 00913 */ 00914 } 00915 00924 function wordOK($w) { 00925 if ($w && strlen($w)>1 && strlen($w)<50) { 00926 if (rawurlencode($w)!=$w) { 00927 $fChars = count(explode('%',rawurlencode($w)))-1; 00928 $rel = round($fChars/strlen($w)*100); 00929 return $rel<30 ? 1 : 0; // Max 30% strange chars! 00930 } else { 00931 return 1; 00932 } 00933 } 00934 } 00935 00942 function metaphone($word) { 00943 $tmp = metaphone($word); 00944 if($tmp=='') $ret=0; else $ret=hexdec(substr(md5($tmp),0,7)); 00945 return $ret; 00946 } 00947 00954 function strtolower_all($str) { 00955 return strtolower(strtr($str, $this->convChars[0], $this->convChars[1])); 00956 } 00957 00958 00959 00960 00961 00962 00963 00964 00965 00966 00967 00968 00969 00970 00971 00972 /******************************** 00973 * 00974 * SQL Helper functions 00975 * 00976 *******************************/ 00977 00985 function freqMap($freq) { 00986 $mapFactor = $this->freqMax*100*$this->freqRange; 00987 if($freq<1) { 00988 $newFreq = $freq*$mapFactor; 00989 $newFreq = $newFreq>$this->freqRange?$this->freqRange:$newFreq; 00990 } else { 00991 $newFreq = $freq/$mapFactor; 00992 } 00993 return $newFreq; 00994 00995 } 00996 01003 function getRootLineFields(&$fieldArr) { 01004 $rl = $this->rootLine; 01005 01006 $fieldArr['rl0'] = intval($rl[0]['uid']); 01007 $fieldArr['rl1'] = intval($rl[1]['uid']); 01008 $fieldArr['rl2'] = intval($rl[2]['uid']); 01009 01010 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) { 01011 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) { 01012 $fieldArr[$fieldName] = intval($rl[$rootLineLevel]['uid']); 01013 } 01014 } 01015 } 01016 01017 01018 01019 01020 01021 01022 01023 01024 01025 01026 01027 01028 01029 01030 /******************************** 01031 * 01032 * SQL Helper functions 01033 * 01034 *******************************/ 01035 01043 function removeIndexedPhashRow($phashList,$clearPageCache=1) { 01044 $phashRows=t3lib_div::trimExplode(',',$phashList,1); 01045 while(list(,$phash)=each($phashRows)) { 01046 $phash = intval($phash); 01047 if ($phash>0) { 01048 01049 if ($clearPageCache) { 01050 // Clearing page cache: 01051 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('page_id', 'index_section', 'phash='.intval($phash)); 01052 if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) { 01053 $idList = array(); 01054 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01055 $idList[] = $row['page_id']; 01056 } 01057 $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pages', 'page_id IN ('.implode(',',$GLOBALS['TYPO3_DB']->cleanIntArray($idList)).')'); 01058 } 01059 } 01060 01061 // Removing old registrations for all tables. 01062 $tableArr = explode(',','index_phash,index_rel,index_section,index_fulltext,index_grlist'); 01063 foreach($tableArr as $table) { 01064 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash="'.$GLOBALS['TYPO3_DB']->quoteStr($phash, $table).'"'); 01065 } 01066 01067 // Did not remove any index_section records for external files where phash_t3 points to this hash! 01068 #debug('DELETE: '.$phash,1); 01069 } 01070 } 01071 } 01072 01083 function checkMtimeTstamp($mtime,$maxAge,$minAge,$phash) { 01084 01085 // Select indexed page: 01086 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash)); 01087 $out = 0; 01088 01089 // If there was an indexing of the page...: 01090 if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01091 if ($maxAge && ($row['tstamp']+$maxAge)<time()) { // If min age is exceeded, index the page 01092 $out = 1; 01093 } else { 01094 if (!$minAge || ($row['tstamp']+$minAge)<time()) { // if minAge is not set or if minAge is exceeded, consider at mtime 01095 if ($mtime) { // It mtime is set, then it's tested. If not, the page must clearly be indexed. 01096 if ($row['item_mtime'] != $mtime) { // And if mtime is different from the index_phash mtime, it's about time to re-index. 01097 $out = 2; 01098 } else { 01099 $out = -1; 01100 $this->updateTstamp($phash); // Update the timestatmp 01101 $GLOBALS['TT']->setTSlogMessage('Mtime matched, timestamp updated.',1); 01102 } 01103 } else {$out = 3; } 01104 } else {$out = -2;} 01105 } 01106 } else {$out = 4;} // No indexing found. 01107 return $out; 01108 } 01109 01117 function update_grlist($phash,$phash_x) { 01118 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->pObj->gr_list)); 01119 if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { 01120 $this->submit_grlist($phash,$phash_x); 01121 $GLOBALS['TT']->setTSlogMessage("Inserted gr_list '".$this->pObj->gr_list."' for phash '".$phash."'",1); 01122 } 01123 } 01124 01129 function is_grlist_set($phash_x) { 01130 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x)); 01131 return $GLOBALS['TYPO3_DB']->sql_num_rows($res); 01132 } 01133 01140 function checkContentHash() { 01141 // With this query the page will only be indexed if it's content is different from the same "phash_grouping" -page. 01142 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash AS A', 'A.phash_grouping='.intval($this->hash['phash_grouping']).' AND A.contentHash='.intval($this->content_md5h)); 01143 if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01144 return $row; 01145 } 01146 return 1; 01147 } 01148 01154 function removeLoginpagesWithContentHash() { 01155 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash AS A,index_grlist AS B', ' 01156 A.phash=B.phash 01157 AND A.phash_grouping='.intval($this->hash['phash_grouping']).' 01158 AND B.hash_gr_list!='.$this->md5inthash($this->defaultGrList).' 01159 AND A.contentHash='.intval($this->content_md5h)); 01160 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01161 $GLOBALS['TT']->setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1); 01162 $this->removeOldIndexedPages($row['phash']); 01163 } 01164 } 01165 01172 function removeOldIndexedPages($phash) { 01173 // Removing old registrations for all tables. Because the pages are TYPO3 pages there can be nothing else than 1-1 relations here. 01174 $tableArr = explode(',','index_phash,index_section,index_grlist,index_fulltext'); 01175 foreach($tableArr as $table) { 01176 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash="'.$GLOBALS['TYPO3_DB']->quoteStr($phash, $table).'"'); 01177 } 01178 // Removing all index_section records with hash_t3 set to this hash (this includes such records set for external media on the page as well!). The re-insert of these records are done in indexRegularDocument($file). 01179 $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3="'.$GLOBALS['TYPO3_DB']->quoteStr($phash, 'index_section').'"'); 01180 } 01181 01190 function checkExternalDocContentHash($hashGr,$content_md5h) { 01191 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash AS A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h)); 01192 if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01193 return 0; 01194 } 01195 return 1; 01196 } 01197 01205 function updateTstamp($phash,$mtime=0) { 01206 $updateFields = array( 01207 'tstamp' => time() 01208 ); 01209 if ($mtime) { $updateFields['item_mtime'] = intval($mtime); } 01210 01211 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); 01212 } 01213 01221 function updateParsetime($phash,$parsetime) { 01222 $updateFields = array( 01223 'parsetime' => intval($parsetime) 01224 ); 01225 01226 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); 01227 } 01228 01234 function updateRootline() { 01235 01236 $updateFields = array(); 01237 $this->getRootLineFields($updateFields); 01238 01239 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->pObj->id), $updateFields); 01240 } 01241 01242 01243 01244 01245 01246 01247 01248 01249 01250 01251 01252 01253 /******************************** 01254 * 01255 * SQL; Inserting in database 01256 * 01257 *******************************/ 01258 01264 function submitPage() { 01265 $this->removeOldIndexedPages($this->hash['phash']); 01266 01267 // setting new 01268 $fields = array( 01269 'phash' => $this->hash['phash'], 01270 'phash_grouping' => $this->hash['phash_grouping'], 01271 'cHashParams' => serialize($this->cHashParams), 01272 'contentHash' => $this->content_md5h, 01273 'data_page_id' => $this->pObj->id, 01274 'data_page_reg1' => $this->pObj->page_cache_reg1, 01275 'data_page_type' => $this->pObj->type, 01276 'data_page_mp' => $this->pObj->MP, 01277 'gr_list' => $this->pObj->gr_list, 01278 'item_type' => 0, // TYPO3 page 01279 'item_title' => $this->contentParts['title'], 01280 'item_description' => $this->bodyDescription($this->contentParts), 01281 'item_mtime' => $this->mtime, 01282 'item_size' => strlen($this->pObj->content), 01283 'tstamp' => time(), 01284 'crdate' => time(), 01285 'item_crdate' => $this->pObj->page['crdate'], // Creation date of page 01286 'sys_language_uid' => $this->pObj->sys_language_uid // Sys language uid of the page. Should reflect which language it DOES actually display! 01287 ); 01288 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); 01289 01290 // ************************ 01291 // PROCESSING index_section 01292 // ************************ 01293 $this->submit_section($this->hash['phash'],$this->hash['phash']); 01294 01295 // ************************ 01296 // PROCESSING index_grlist 01297 // ************************ 01298 $this->submit_grlist($this->hash['phash'],$this->hash['phash']); 01299 01300 // ************************ 01301 // PROCESSING index_fulltext 01302 // ************************ 01303 $fields = array( 01304 'phash' => $this->hash['phash'], 01305 'fulltextdata' => implode($this->contentParts,' ') 01306 ); 01307 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); 01308 } 01309 01317 function submit_grlist($hash,$phash_x) { 01318 // Setting the gr_list record 01319 $fields = array( 01320 'phash' => $hash, 01321 'phash_x' => $phash_x, 01322 'hash_gr_list' => $this->md5inthash($this->pObj->gr_list), 01323 'gr_list' => $this->pObj->gr_list 01324 ); 01325 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields); 01326 } 01327 01335 function submit_section($hash,$hash_t3) { 01336 $fields = array( 01337 'phash' => $hash, 01338 'phash_t3' => $hash_t3, 01339 'page_id' => intval($this->pObj->id) 01340 ); 01341 01342 $this->getRootLineFields($fields); 01343 01344 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields); 01345 } 01346 01361 function submitFilePage($hash,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts) { 01362 // Removing old registrations for tables. 01363 $tableArr = explode(',','index_phash,index_fulltext,index_grlist'); 01364 foreach($tableArr as $table) { 01365 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash="'.$GLOBALS['TYPO3_DB']->quoteStr($hash['phash'], $table).'"'); 01366 } 01367 // setting new 01368 $fields = array( 01369 'phash' => $hash['phash'], 01370 'phash_grouping' => $hash['phash_grouping'], 01371 'cHashParams' => serialize($subinfo), 01372 'contentHash' => $content_md5h, 01373 'data_filename' => $file, 01374 'item_type' => intval($this->Itypes[$ext]) ? intval($this->Itypes[$ext]) : -1, 01375 'item_title' => trim($contentParts['title']) ? $contentParts['title'] : basename($file), 01376 'item_description' => $this->bodyDescription($contentParts), 01377 'item_mtime' => $mtime, 01378 'item_size' => $size, 01379 'item_crdate' => $ctime, 01380 'tstamp' => time(), 01381 'crdate' => time(), 01382 'gr_list' => $this->pObj->gr_list 01383 ); 01384 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); 01385 01386 // ************************ 01387 // PROCESSING index_fulltext 01388 // ************************ 01389 $fields = array( 01390 'phash' => $hash['phash'], 01391 'fulltextdata' => implode($contentParts,' ') 01392 ); 01393 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); 01394 } 01395 01402 function submitFile_grlist($hash) { 01403 // ************************ 01404 // PROCESSING index_grlist 01405 // ************************ 01406 // Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one. 01407 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->pObj->gr_list).')'); 01408 if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { 01409 $this->submit_grlist($hash,$hash); 01410 } 01411 } 01412 01419 function submitFile_section($hash) { 01420 // ************************ 01421 // PROCESSING index_grlist 01422 // ************************ 01423 // Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one. 01424 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->pObj->id)); 01425 if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { 01426 $this->submit_section($hash,$this->hash['phash']); 01427 } 01428 } 01429 01436 function checkWordList($wl) { 01437 reset($wl); 01438 $phashArr=array(); 01439 while(list($key,)=each($wl)) { 01440 $phashArr[] = $wl[$key]['hash']; 01441 } 01442 if (count($phashArr)) { 01443 $cwl = implode(',',$phashArr); 01444 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')'); 01445 01446 if($GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) { 01447 $GLOBALS['TT']->setTSlogMessage('Inserting words: '.(count($wl)-$GLOBALS['TYPO3_DB']->sql_num_rows($res)),1); 01448 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01449 unset($wl[$row['baseword']]); 01450 } 01451 01452 reset($wl); 01453 while(list($key,$val)=each($wl)) { 01454 $insertFields = array( 01455 'wid' => $val['hash'], 01456 'baseword' => $key, 01457 'metaphone' => $val['metaphone'] 01458 ); 01459 // A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 30 chars (the baseword varchar is 30 characters...) this is not a problem. 01460 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields); 01461 } 01462 } 01463 } 01464 } 01465 01473 function submitWords($wl,$phash) { 01474 $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash="'.$GLOBALS['TYPO3_DB']->quoteStr($phash, 'index_rel').'"'); 01475 01476 foreach($wl as $val) { 01477 $insertFields = array( 01478 'phash' => $phash, 01479 'wid' => $val['hash'], 01480 'count' => $val['count'], 01481 'first' => $val['first'], 01482 'freq' => $this->freqMap(($val['count']/$this->wordcount)), 01483 'flags' => $val['cmp'] 01484 ); 01485 01486 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields); 01487 } 01488 } 01489 01490 01491 01492 01493 01494 01495 01496 01497 01498 01499 01500 01501 01502 01503 01504 01505 01506 /******************************** 01507 * 01508 * Hashing 01509 * 01510 *******************************/ 01511 01517 function setT3Hashes() { 01518 // Set main array: 01519 $hArray = array( 01520 'id' => $this->pObj->id, 01521 'type' => $this->pObj->type, 01522 'sys_lang' => $this->pObj->sys_language_uid, 01523 'MP' => $this->pObj->MP, 01524 'cHash' => $this->cHashParams 01525 ); 01526 // Set grouping hash: 01527 $this->hash['phash_grouping'] = $this->md5inthash(serialize($hArray)); 01528 // Add gr_list and set plain phash 01529 $hArray['gr_list']=$this->pObj->gr_list; 01530 $this->hash['phash'] = $this->md5inthash(serialize($hArray)); 01531 } 01532 01540 function setExtHashes($file,$subinfo=array()) { 01541 // Set main array: 01542 $hash = array(); 01543 $hArray = array( 01544 'file' => $file, 01545 ); 01546 01547 // Set grouping hash: 01548 $hash['phash_grouping'] = $this->md5inthash(serialize($hArray)); 01549 01550 // Add subinfo 01551 $hArray['subinfo'] = $subinfo; 01552 $hash['phash'] = $this->md5inthash(serialize($hArray)); 01553 01554 return $hash; 01555 } 01556 01563 function md5inthash($str) { 01564 // Using 7 instead of 8 just because that makes the integers lower than 32 bit (28 bit) and so they does not interfere with UNSIGNED integers or PHP-versions which has varying output from the hexdec function. 01565 // NOTICE: This must be changed a number of other places as well! 01566 return hexdec(substr(md5($str),0,7)); 01567 } 01568 } 01569 01570 01571 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.indexer.php']) { 01572 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.indexer.php']); 01573 } 01574 ?>