00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00128 require_once(PATH_t3lib.'class.t3lib_parsehtml.php');
00129
00130
00138 class tx_indexedsearch_indexer {
00139
00140
00141 var $reasons = array(
00142 -1 => 'mtime matched the document, so no changes detected and no content updated',
00143 -2 => 'The minimum age was not exceeded',
00144 1 => "The configured max-age was exceeded for the document and thus it's indexed.",
00145 2 => 'The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.',
00146 3 => 'The minimum age was exceed, but mtime was not set, so the page was indexed.',
00147 4 => 'Page has never been indexed (is not represented in the index_phash table).'
00148 );
00149
00150
00151 var $excludeSections = 'script,style';
00152
00153
00154 var $external_parsers = array();
00155
00156
00157 var $defaultGrList = '0,-1';
00158
00159
00160 var $tstamp_maxAge = 0;
00161 var $tstamp_minAge = 0;
00162 var $maxExternalFiles = 0;
00163
00164 var $forceIndexing = FALSE;
00165 var $crawlerActive = FALSE;
00166
00167
00168 var $defaultContentArray=array(
00169 'title' => '',
00170 'description' => '',
00171 'keywords' => '',
00172 'body' => '',
00173 );
00174 var $wordcount = 0;
00175 var $externalFileCounter = 0;
00176
00177 var $conf = array();
00178 var $indexerConfig = array();
00179 var $hash = array();
00180 var $file_phash_arr = array();
00181 var $contentParts = array();
00182 var $content_md5h = '';
00183 var $internal_log = array();
00184 var $indexExternalUrl_content = '';
00185
00186 var $cHashParams = array();
00187
00188 var $freqRange = 32000;
00189 var $freqMax = 0.1;
00190
00191
00192 var $csObj;
00193 var $metaphoneObj;
00194 var $lexerObj;
00195
00196
00197
00204 function hook_indexContent(&$pObj) {
00205
00206
00207 $indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
00208
00209
00210
00211 if (t3lib_extMgm::isLoaded('crawler')
00212 && $pObj->applicationData['tx_crawler']['running']
00213 && in_array('tx_indexedsearch_reindex', $pObj->applicationData['tx_crawler']['parameters']['procInstructions'])) {
00214
00215
00216 $pObj->applicationData['tx_crawler']['log'][] = 'Forced Re-indexing enabled';
00217
00218
00219 $this->crawlerActive = TRUE;
00220 $this->forceIndexing = TRUE;
00221 }
00222
00223
00224 if ($pObj->config['config']['index_enable']) {
00225 $this->log_push('Index page','');
00226
00227 if (!$indexerConfig['disableFrontendIndexing'] || $this->crawlerActive) {
00228 if (!$pObj->page['no_search']) {
00229 if (!$pObj->no_cache) {
00230
00231
00232 $this->conf = array();
00233
00234
00235 $this->conf['id'] = $pObj->id;
00236 $this->conf['type'] = $pObj->type;
00237 $this->conf['sys_language_uid'] = $pObj->sys_language_uid;
00238 $this->conf['MP'] = $pObj->MP;
00239 $this->conf['gr_list'] = $pObj->gr_list;
00240
00241 $this->conf['cHash'] = $pObj->cHash;
00242 $this->conf['cHash_array'] = $pObj->cHash_array;
00243
00244 $this->conf['crdate'] = $pObj->page['crdate'];
00245 $this->conf['page_cache_reg1'] = $pObj->page_cache_reg1;
00246
00247
00248 $this->conf['rootline_uids'] = array();
00249 foreach($pObj->config['rootLine'] as $rlkey => $rldat) {
00250 $this->conf['rootline_uids'][$rlkey] = $rldat['uid'];
00251 }
00252
00253
00254 $this->conf['content'] = $pObj->content;
00255 $this->conf['indexedDocTitle'] = $pObj->convOutputCharset($pObj->indexedDocTitle);
00256 $this->conf['metaCharset'] = $pObj->metaCharset;
00257 $this->conf['mtime'] = $pObj->register['SYS_LASTCHANGED'];
00258
00259
00260 $this->conf['index_externals'] = $pObj->config['config']['index_externals'];
00261 $this->conf['index_descrLgd'] = $pObj->config['config']['index_descrLgd'];
00262
00263
00264 $this->conf['recordUid'] = 0;
00265 $this->conf['freeIndexUid'] = 0;
00266
00267
00268 $this->init();
00269 $this->indexTypo3PageContent();
00270
00271 } else $this->log_setTSlogMessage('Index page? No, page was set to "no_cache" and so cannot be indexed.');
00272 } else $this->log_setTSlogMessage('Index page? No, The "No Search" flag has been set in the page header!');
00273 } else $this->log_setTSlogMessage('Index page? No, Ordinary Frontend indexing during rendering is disabled.');
00274 }
00275 $this->log_pull();
00276 }
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00303 function backend_initIndexer($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array=array(), $createCHash=FALSE) {
00304
00305
00306 $this->conf = array();
00307
00308
00309 $this->conf['id'] = $id;
00310 $this->conf['type'] = $type;
00311 $this->conf['sys_language_uid'] = $sys_language_uid;
00312 $this->conf['MP'] = $MP;
00313 $this->conf['gr_list'] = '0,-1';
00314
00315
00316 $this->conf['cHash'] = $createCHash ? $this->makeCHash($cHash_array) : '';
00317 $this->conf['cHash_array'] = $cHash_array;
00318
00319
00320 $this->conf['freeIndexUid'] = 0;
00321 $this->conf['page_cache_reg1'] = '';
00322
00323
00324 $this->conf['rootline_uids'] = $uidRL;
00325
00326
00327 $this->conf['index_externals'] = 1;
00328 $this->conf['index_descrLgd'] = 200;
00329
00330
00331 $this->init();
00332 }
00333
00340 function backend_setFreeIndexUid($freeIndexUid) {
00341 $this->conf['freeIndexUid'] = $freeIndexUid;
00342 }
00343
00357 function backend_indexAsTYPO3Page($title, $keywords, $description, $content, $charset, $mtime, $crdate=0, $recordUid=0) {
00358
00359
00360 $this->conf['mtime'] = $mtime;
00361 $this->conf['crdate'] = $crdate;
00362 $this->conf['recordUid'] = $recordUid;
00363
00364
00365 $this->conf['content'] = '
00366 <html>
00367 <head>
00368 <title>'.htmlspecialchars($title).'</title>
00369 <meta name="keywords" content="'.htmlspecialchars($keywords).'" />
00370 <meta name="description" content="'.htmlspecialchars($description).'" />
00371 </head>
00372 <body>
00373 '.htmlspecialchars($content).'
00374 </body>
00375 </html>';
00376
00377
00378 $this->conf['metaCharset'] = $charset;
00379 $this->conf['indexedDocTitle'] = '';
00380
00381
00382 $this->indexTypo3PageContent();
00383 }
00384
00385
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401
00402
00408 function init() {
00409 global $TYPO3_CONF_VARS;
00410
00411
00412 $this->cHashParams = $this->conf['cHash_array'];
00413 if (is_array($this->cHashParams) && count($this->cHashParams)) {
00414 if ($this->conf['cHash']) $this->cHashParams['cHash'] = $this->conf['cHash'];
00415 unset($this->cHashParams['encryptionKey']);
00416 }
00417
00418
00419 $this->setT3Hashes();
00420
00421
00422 $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
00423 $this->tstamp_minAge = t3lib_div::intInRange($this->indexerConfig['minAge']*3600,0);
00424 $this->tstamp_maxAge = t3lib_div::intInRange($this->indexerConfig['maxAge']*3600,0);
00425 $this->maxExternalFiles = t3lib_div::intInRange($this->indexerConfig['maxExternalFiles'],0,1000,5);
00426 $this->flagBitMask = t3lib_div::intInRange($this->indexerConfig['flagBitMask'],0,255);
00427
00428
00429
00430 if ($this->conf['index_externals']) {
00431 $this->initializeExternalParsers();
00432 }
00433
00434
00435
00436 $lexerObjRef = $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['lexer'] ?
00437 $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['lexer'] :
00438 'EXT:indexed_search/class.lexer.php:&tx_indexedsearch_lexer';
00439 $this->lexerObj = &t3lib_div::getUserObj($lexerObjRef);
00440 $this->lexerObj->debug = $this->indexerConfig['debugMode'];
00441
00442
00443
00444 if ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) {
00445 $this->metaphoneObj = &t3lib_div::getUserObj($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']);
00446 $this->metaphoneObj->pObj = &$this;
00447 }
00448
00449
00450 $this->csObj = &t3lib_div::makeInstance('t3lib_cs');
00451 }
00452
00460 function initializeExternalParsers() {
00461 global $TYPO3_CONF_VARS;
00462
00463 if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) {
00464 foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) {
00465 $this->external_parsers[$extension] = &t3lib_div::getUserObj($_objRef);
00466 $this->external_parsers[$extension]->pObj = &$this;
00467
00468
00469 if (!$this->external_parsers[$extension]->initParser($extension)) {
00470 unset($this->external_parsers[$extension]);
00471 }
00472 }
00473 }
00474 }
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494
00495
00501 function indexTypo3PageContent() {
00502
00503 $check = $this->checkMtimeTstamp($this->conf['mtime'], $this->hash['phash']);
00504 $is_grlist = $this->is_grlist_set($this->hash['phash']);
00505
00506 if ($check > 0 || !$is_grlist || $this->forceIndexing) {
00507
00508
00509 if ($this->forceIndexing) {
00510 $this->log_setTSlogMessage('Indexing needed, reason: Forced',1);
00511 } elseif ($check > 0) {
00512 $this->log_setTSlogMessage('Indexing needed, reason: '.$this->reasons[$check],1);
00513 } else {
00514 $this->log_setTSlogMessage('Indexing needed, reason: Updates gr_list!',1);
00515 }
00516
00517
00518 $this->log_push('Split content','');
00519 $this->contentParts = $this->splitHTMLContent($this->conf['content']);
00520 if ($this->conf['indexedDocTitle']) {
00521 $this->contentParts['title'] = $this->conf['indexedDocTitle'];
00522 }
00523 $this->log_pull();
00524
00525
00526 $this->content_md5h = $this->md5inthash(implode($this->contentParts,''));
00527
00528
00529
00530
00531 $checkCHash = $this->checkContentHash();
00532 if (!is_array($checkCHash) || $check===1) {
00533 $Pstart=t3lib_div::milliseconds();
00534
00535 $this->log_push('Converting charset of content ('.$this->conf['metaCharset'].') to utf-8','');
00536 $this->charsetEntity2utf8($this->contentParts,$this->conf['metaCharset']);
00537 $this->log_pull();
00538
00539
00540 $this->log_push('Extract words from content','');
00541 $splitInWords = $this->procesWordsInArrays($this->contentParts);
00542 $this->log_pull();
00543
00544
00545 $this->log_push('Analyse the extracted words','');
00546 $indexArr = $this->indexAnalyze($splitInWords);
00547 $this->log_pull();
00548
00549
00550 $this->log_push('Submitting page','');
00551 $this->submitPage();
00552 $this->log_pull();
00553
00554
00555 $this->log_push('Check word list and submit words','');
00556 $this->checkWordList($indexArr);
00557 $this->submitWords($indexArr,$this->hash['phash']);
00558 $this->log_pull();
00559
00560
00561 $this->updateParsetime($this->hash['phash'],t3lib_div::milliseconds()-$Pstart);
00562
00563
00564 $this->log_push('Checking external files','');
00565 if ($this->conf['index_externals']) {
00566 $this->extractLinks($this->conf['content']);
00567 }
00568 $this->log_pull();
00569 } else {
00570 $this->updateTstamp($this->hash['phash'],$this->conf['mtime']);
00571 $this->update_grlist($checkCHash['phash'],$this->hash['phash']);
00572 $this->updateRootline();
00573 $this->log_setTSlogMessage('Indexing not needed, the contentHash, '.$this->content_md5h.', has not changed. Timestamp, grlist and rootline updated if necessary.');
00574 }
00575 } else {
00576 $this->log_setTSlogMessage('Indexing not needed, reason: '.$this->reasons[$check]);
00577 }
00578 }
00579
00587 function splitHTMLContent($content) {
00588
00589
00590 $contentArr = $this->defaultContentArray;
00591 $contentArr['body'] = stristr($content,'<body');
00592 $headPart = substr($content,0,-strlen($contentArr['body']));
00593
00594
00595 $this->embracingTags($headPart,'TITLE',$contentArr['title'],$dummy2,$dummy);
00596 $titleParts = explode(':',$contentArr['title'],2);
00597 $contentArr['title'] = trim(isset($titleParts[1]) ? $titleParts[1] : $titleParts[0]);
00598
00599
00600 for($i=0;$this->embracingTags($headPart,'meta',$dummy,$headPart,$meta[$i]);$i++) { }
00601 for($i=0;isset($meta[$i]);$i++) {
00602 $meta[$i] = t3lib_div::get_tag_attributes($meta[$i]);
00603 if(stristr($meta[$i]['name'],'keywords')) $contentArr['keywords'].=','.$meta[$i]['content'];
00604 if(stristr($meta[$i]['name'],'description')) $contentArr['description'].=','.$meta[$i]['content'];
00605 }
00606
00607
00608 $this->typoSearchTags($contentArr['body']);
00609
00610
00611 $tagList = explode(',',$this->excludeSections);
00612 foreach($tagList as $tag) {
00613 while($this->embracingTags($contentArr['body'],$tag,$dummy,$contentArr['body'],$dummy2));
00614 }
00615
00616
00617 $contentArr['body'] = str_replace('<',' <',$contentArr['body']);
00618 $contentArr['body'] = trim(strip_tags($contentArr['body']));
00619
00620 $contentArr['keywords'] = trim($contentArr['keywords']);
00621 $contentArr['description'] = trim($contentArr['description']);
00622
00623
00624 return $contentArr;
00625 }
00626
00633 function getHTMLcharset($content) {
00634 if (eregi('<meta[[:space:]]+[^>]*http-equiv[[:space:]]*=[[:space:]]*["\']CONTENT-TYPE["\'][^>]*>',$content,$reg)) {
00635 if (eregi('charset[[:space:]]*=[[:space:]]*([[:alnum:]-]+)',$reg[0],$reg2)) {
00636 return $reg2[1];
00637 }
00638 }
00639 }
00640
00648 function convertHTMLToUtf8($content,$charset='') {
00649
00650
00651 $charset = $charset ? $charset : $this->getHTMLcharset($content);
00652 $charset = $this->csObj->parse_charset($charset);
00653
00654
00655 if ($charset && $charset!=='utf-8') {
00656 $content = $this->csObj->utf8_encode($content, $charset);
00657 }
00658
00659 $content = $this->csObj->entities_to_utf8($content, TRUE);
00660
00661 return $content;
00662 }
00663
00676 function embracingTags($string,$tagName,&$tagContent,&$stringAfter,&$paramList) {
00677 $endTag = '</'.$tagName.'>';
00678 $startTag = '<'.$tagName;
00679
00680 $isTagInText = stristr($string,$startTag);
00681 if(!$isTagInText) return false;
00682
00683 list($paramList,$isTagInText) = explode('>',substr($isTagInText,strlen($startTag)),2);
00684 $afterTagInText = stristr($isTagInText,$endTag);
00685 if ($afterTagInText) {
00686 $stringBefore = substr($string, 0, strpos(strtolower($string), strtolower($startTag)));
00687 $tagContent = substr($isTagInText,0,strlen($isTagInText)-strlen($afterTagInText));
00688 $stringAfter = $stringBefore.substr($afterTagInText,strlen($endTag));
00689 } else {
00690 $tagContent='';
00691 $stringAfter = $isTagInText;
00692 }
00693
00694 return true;
00695 }
00696
00703 function typoSearchTags(&$body) {
00704 $expBody = explode('<!--TYPO3SEARCH_',$body);
00705
00706 if(count($expBody)>1) {
00707 $body = '';
00708
00709 foreach($expBody as $val) {
00710 $part = explode('-->',$val,2);
00711 if(trim($part[0])=='begin') {
00712 $body.= $part[1];
00713 $prev = '';
00714 } elseif(trim($part[0])=='end') {
00715 $body.= $prev;
00716 } else {
00717 $prev = $val;
00718 }
00719 }
00720 return true;
00721 } else {
00722 return false;
00723 }
00724 }
00725
00732 function extractLinks($content) {
00733
00734
00735 $list = $this->extractHyperLinks($content);
00736
00737
00738 foreach($list as $linkInfo) {
00739
00740
00741 $linkSource = t3lib_div::htmlspecialchars_decode($linkInfo['href']);
00742
00743
00744 $qParts = parse_url($linkSource);
00745
00746
00747 if ($qParts['query'] && strstr($qParts['query'],'jumpurl=')) {
00748 parse_str($qParts['query'],$getP);
00749 $linkSource = $getP['jumpurl'];
00750 $qParts = parse_url($linkSource);
00751 }
00752
00753 if ($qParts['scheme']) {
00754 if ($this->indexerConfig['indexExternalURLs']) {
00755
00756 $this->indexExternalUrl($linkSource);
00757 }
00758 } elseif (!$qParts['query']) {
00759 $localFile = t3lib_div::getFileAbsFileName(PATH_site.$linkSource);
00760 if ($localFile && @is_file($localFile)) {
00761
00762 $this->indexRegularDocument($linkSource);
00763 }
00764 }
00765 }
00766 }
00767
00775 function extractHyperLinks($string) {
00776 if (!is_object($this->htmlParser)) {
00777 $this->htmlParser = t3lib_div::makeInstance('t3lib_parseHtml');
00778 }
00779
00780 $parts = $this->htmlParser->splitTags('a',$string);
00781 $list = array();
00782 foreach($parts as $k => $v) {
00783 if ($k%2) {
00784 $params = $this->htmlParser->get_tag_attributes($v,1);
00785 $firstTagName = $this->htmlParser->getFirstTagName($v);
00786
00787 switch(strtolower($firstTagName)) {
00788 case 'a':
00789 $src = $params[0]['href'];
00790 if ($src) {
00791 $list[] = array(
00792 'tag' => $v,
00793 'href' => $params[0]['href']
00794 );
00795 }
00796 break;
00797 }
00798 }
00799 }
00800
00801 return $list;
00802 }
00803
00804
00805
00806
00807
00808
00809
00810
00811
00812
00813
00814
00815
00816
00817
00818
00819
00827 function indexExternalUrl($externalUrl) {
00828
00829
00830 $qParts = parse_url($externalUrl);
00831 $fI = pathinfo($qParts['path']);
00832 $ext = strtolower($fI['extension']);
00833
00834
00835 $urlHeaders = $this->getUrlHeaders($externalUrl);
00836 if (stristr($urlHeaders['Content-Type'],'text/html')) {
00837 $content = $this->indexExternalUrl_content = t3lib_div::getUrl($externalUrl);
00838 if (strlen($content)) {
00839
00840
00841 $tmpFile = t3lib_div::tempnam('EXTERNAL_URL').'.html';
00842 t3lib_div::writeFile($tmpFile, $content);
00843
00844
00845 $this->indexRegularDocument($externalUrl, FALSE, $tmpFile, 'html');
00846 unlink($tmpFile);
00847 }
00848 }
00849 }
00850
00858 function getUrlHeaders($url, $timeout = 2) {
00859 $url = parse_url($url);
00860
00861 if(!in_array($url['scheme'],array('','http'))) return FALSE;
00862
00863 $fp = fsockopen ($url['host'], ($url['port'] > 0 ? $url['port'] : 80), $errno, $errstr, $timeout);
00864 if (!$fp) {
00865 return FALSE;
00866 } else {
00867 $msg = "GET ".$url['path'].($url['query'] ? '?'.$url['query'] : '')." HTTP/1.0\r\nHost: ".$url['host']."\r\n\r\n";
00868 fputs ($fp, $msg);
00869 $d = '';
00870 while (!feof($fp)) {
00871 $line = fgets ($fp,2048);
00872
00873 $d.=$line;
00874 if (!strlen(trim($line))) {
00875 break;
00876 }
00877 }
00878 fclose ($fp);
00879
00880
00881 $headers = t3lib_div::trimExplode(chr(10),$d,1);
00882 $retVal = array();
00883 foreach($headers as $line) {
00884 list($headKey, $headValue) = explode(':', $line, 2);
00885 $retVal[$headKey] = $headValue;
00886 }
00887 return $retVal;
00888 }
00889 }
00890
00891
00892
00893
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905
00906
00907
00908
00918 function indexRegularDocument($file, $force=FALSE, $contentTmpFile='', $altExtension='') {
00919
00920
00921 $fI = pathinfo($file);
00922 $ext = $altExtension ? $altExtension : strtolower($fI['extension']);
00923
00924
00925 if (!$contentTmpFile) {
00926 if (!t3lib_div::isAbsPath($file)) {
00927 $absFile = t3lib_div::getFileAbsFileName(PATH_site.$file);
00928 } else {
00929 $absFile = $file;
00930 }
00931 $absFile = t3lib_div::isAllowedAbsPath($absFile) ? $absFile : '';
00932 } else {
00933 $absFile = $contentTmpFile;
00934 }
00935
00936
00937 if ($absFile && @is_file($absFile)) {
00938 if ($this->external_parsers[$ext]) {
00939 $mtime = filemtime($absFile);
00940 $cParts = $this->fileContentParts($ext,$absFile);
00941
00942 foreach($cParts as $cPKey) {
00943 $this->internal_log = array();
00944 $this->log_push('Index: '.str_replace('.','_',basename($file)).($cPKey?'#'.$cPKey:''),'');
00945 $Pstart = t3lib_div::milliseconds();
00946 $subinfo = array('key' => $cPKey);
00947 $phash_arr = $this->file_phash_arr = $this->setExtHashes($file,$subinfo);
00948 $check = $this->checkMtimeTstamp($mtime, $phash_arr['phash']);
00949 if ($check > 0 || $force) {
00950 if ($check > 0) {
00951 $this->log_setTSlogMessage('Indexing needed, reason: '.$this->reasons[$check],1);
00952 } else {
00953 $this->log_setTSlogMessage('Indexing forced by flag',1);
00954 }
00955
00956
00957 if ($this->externalFileCounter < $this->maxExternalFiles || $force) {
00958
00959
00960 $this->log_push('Split content','');
00961 $contentParts = $this->readFileContent($ext,$absFile,$cPKey);
00962 $this->log_pull();
00963
00964 if (is_array($contentParts)) {
00965
00966 $content_md5h = $this->md5inthash(implode($contentParts,''));
00967
00968 if ($this->checkExternalDocContentHash($phash_arr['phash_grouping'], $content_md5h) || $force) {
00969
00970
00971 $this->externalFileCounter++;
00972
00973
00974 $this->log_push('Extract words from content','');
00975 $splitInWords = $this->procesWordsInArrays($contentParts);
00976 $this->log_pull();
00977
00978
00979 $this->log_push('Analyse the extracted words','');
00980 $indexArr = $this->indexAnalyze($splitInWords);
00981 $this->log_pull();
00982
00983
00984 $this->log_push('Submitting page','');
00985 $size = filesize($absFile);
00986 $ctime = filemtime($absFile);
00987 $this->submitFilePage($phash_arr,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts);
00988 $this->log_pull();
00989
00990
00991 $this->log_push('Check word list and submit words','');
00992 $this->checkWordList($indexArr);
00993 $this->submitWords($indexArr,$phash_arr['phash']);
00994 $this->log_pull();
00995
00996
00997 $this->updateParsetime($phash_arr['phash'],t3lib_div::milliseconds()-$Pstart);
00998 } else {
00999 $this->updateTstamp($phash_arr['phash'],$mtime);
01000 $this->log_setTSlogMessage('Indexing not needed, the contentHash, '.$content_md5h.', has not changed. Timestamp updated.');
01001 }
01002 } else $this->log_setTSlogMessage('Could not index file! Unsupported extension.');
01003 } else $this->log_setTSlogMessage('The limit of '.$this->maxExternalFiles.' has already been exceeded, so no indexing will take place this time.');
01004 } else $this->log_setTSlogMessage('Indexing not needed, reason: '.$this->reasons[$check]);
01005
01006
01007 # $this->submitFile_grlist($phash_arr['phash']); // Setting a gr_list record if there is none already (set for default fe_group)
01008 $this->submitFile_section($phash_arr['phash']);
01009 $this->log_pull();
01010 }
01011 } else $this->log_setTSlogMessage('Indexing not possible; The extension "'.$ext.'" was not supported.');
01012 } else $this->log_setTSlogMessage('Indexing not possible; File "'.$absFile.'" not found or valid.');
01013 }
01014
01024 function readFileContent($ext,$absFile,$cPKey) {
01025
01026
01027 if (is_object($this->external_parsers[$ext])) {
01028 $contentArr = $this->external_parsers[$ext]->readFileContent($ext,$absFile,$cPKey);
01029 }
01030
01031 return $contentArr;
01032 }
01033
01041 function fileContentParts($ext,$absFile) {
01042 $cParts = array(0);
01043
01044
01045 if (is_object($this->external_parsers[$ext])) {
01046 $cParts = $this->external_parsers[$ext]->fileContentParts($ext,$absFile);
01047 }
01048
01049 return $cParts;
01050 }
01051
01059 function splitRegularContent($content) {
01060 $contentArr = $this->defaultContentArray;
01061 $contentArr['body'] = $content;
01062
01063 return $contentArr;
01064 }
01065
01066
01067
01068
01069
01070
01071
01072
01073
01074
01075
01076
01077
01078
01079
01080
01081
01082
01083
01084
01092 function charsetEntity2utf8(&$contentArr, $charset) {
01093
01094
01095 reset($contentArr);
01096 while(list($key,)=each($contentArr)) {
01097 if (strlen($contentArr[$key])) {
01098
01099 if ($charset!=='utf-8') {
01100 $contentArr[$key] = $this->csObj->utf8_encode($contentArr[$key], $charset);
01101 }
01102
01103
01104 $contentArr[$key] = $this->csObj->entities_to_utf8($contentArr[$key],TRUE);
01105 }
01106 }
01107 }
01108
01115 function procesWordsInArrays($contentArr) {
01116
01117
01118 reset($contentArr);
01119 while(list($key,)=each($contentArr)) {
01120 $contentArr[$key] = $this->lexerObj->split2Words($contentArr[$key]);
01121 }
01122
01123
01124 $contentArr['title'] = array_unique($contentArr['title']);
01125 $contentArr['keywords'] = array_unique($contentArr['keywords']);
01126 $contentArr['description'] = array_unique($contentArr['description']);
01127
01128
01129 return $contentArr;
01130 }
01131
01138 function bodyDescription($contentArr) {
01139
01140
01141 $maxL = t3lib_div::intInRange($this->conf['index_descrLgd'],0,255,200);
01142 if ($maxL) {
01143
01144 # $bodyDescription = implode(' ',split('[[:space:],]+',substr(trim($contentArr['body']),0,$maxL*4)));
01145 $bodyDescription = str_replace(array(' ',"\t","\r","\n"),' ',$contentArr['body']);
01146
01147
01148 $bodyDescription = $this->csObj->strtrunc('utf-8', $bodyDescription, $maxL);
01149 }
01150
01151 return $bodyDescription;
01152 }
01153
01160 function indexAnalyze($content) {
01161 $indexArr = Array();
01162 $counter = 0;
01163
01164 $this->analyzeHeaderinfo($indexArr,$content,'title',7);
01165 $this->analyzeHeaderinfo($indexArr,$content,'keywords',6);
01166 $this->analyzeHeaderinfo($indexArr,$content,'description',5);
01167 $this->analyzeBody($indexArr,$content);
01168
01169 return ($indexArr);
01170 }
01171
01181 function analyzeHeaderinfo(&$retArr,$content,$key,$offset) {
01182 reset($content[$key]);
01183 while(list(,$val)=each($content[$key])) {
01184 $val = substr($val,0,60);
01185 $retArr[$val]['cmp'] = $retArr[$val]['cmp']|pow(2,$offset);
01186 $retArr[$val]['count'] = $retArr[$val]['count']+1;
01187 $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
01188 $retArr[$val]['metaphone'] = $this->metaphone($val);
01189 $this->wordcount++;
01190 }
01191 }
01192
01200 function analyzeBody(&$retArr,$content) {
01201 foreach($content['body'] as $key => $val) {
01202 $val = substr($val,0,60);
01203 if(!isset($retArr[$val])) {
01204 $retArr[$val]['first'] = $key;
01205 $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
01206 $retArr[$val]['metaphone'] = $this->metaphone($val);
01207 }
01208 $retArr[$val]['count'] = $retArr[$val]['count']+1;
01209 $this->wordcount++;
01210 }
01211 }
01212
01220 function metaphone($word,$retRaw=FALSE) {
01221
01222 if (is_object($this->metaphoneObj)) {
01223 $tmp = $this->metaphoneObj->metaphone($word, $this->conf['sys_language_uid']);
01224 } else {
01225 $tmp = metaphone($word);
01226 }
01227
01228
01229 if ($retRaw) return $tmp;
01230
01231
01232 if($tmp=='') $ret=0; else $ret=hexdec(substr(md5($tmp),0,7));
01233 return $ret;
01234 }
01235
01236
01237
01238
01239
01240
01241
01242
01243
01244
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254
01255
01256
01262 function submitPage() {
01263
01264
01265 $this->removeOldIndexedPages($this->hash['phash']);
01266
01267
01268 $fields = array(
01269 'phash' => $this->hash['phash'],
01270 'phash_grouping' => $this->hash['phash_grouping'],
01271 'cHashParams' => serialize($this->cHashParams),
01272 'contentHash' => $this->content_md5h,
01273 'data_page_id' => $this->conf['id'],
01274 'data_page_reg1' => $this->conf['page_cache_reg1'],
01275 'data_page_type' => $this->conf['type'],
01276 'data_page_mp' => $this->conf['MP'],
01277 'gr_list' => $this->conf['gr_list'],
01278 'item_type' => 0,
01279 'item_title' => $this->contentParts['title'],
01280 'item_description' => $this->bodyDescription($this->contentParts),
01281 'item_mtime' => $this->conf['mtime'],
01282 'item_size' => strlen($this->conf['content']),
01283 'tstamp' => time(),
01284 'crdate' => time(),
01285 'item_crdate' => $this->conf['crdate'],
01286 'sys_language_uid' => $this->conf['sys_language_uid'],
01287 'externalUrl' => 0,
01288 'recordUid' => intval($this->conf['recordUid']),
01289 'freeIndexUid' => intval($this->conf['freeIndexUid']),
01290 );
01291 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
01292
01293
01294 $this->submit_section($this->hash['phash'],$this->hash['phash']);
01295
01296
01297 $this->submit_grlist($this->hash['phash'],$this->hash['phash']);
01298
01299
01300 $fields = array(
01301 'phash' => $this->hash['phash'],
01302 'fulltextdata' => implode(' ', $this->contentParts)
01303 );
01304 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
01305
01306
01307 if ($this->indexerConfig['debugMode']) {
01308 $fields = array(
01309 'phash' => $this->hash['phash'],
01310 'debuginfo' => serialize(array(
01311 'cHashParams' => $this->cHashParams,
01312 'external_parsers initialized' => array_keys($this->external_parsers),
01313 'conf' => array_merge($this->conf,array('content'=>substr($this->conf['content'],0,1000))),
01314 'contentParts' => array_merge($this->contentParts,array('body' => substr($this->contentParts['body'],0,1000))),
01315 'logs' => $this->internal_log,
01316 'lexer' => $this->lexerObj->debugString,
01317 ))
01318 );
01319 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
01320 }
01321 }
01322
01331 function submit_grlist($hash,$phash_x) {
01332
01333
01334 $fields = array(
01335 'phash' => $hash,
01336 'phash_x' => $phash_x,
01337 'hash_gr_list' => $this->md5inthash($this->conf['gr_list']),
01338 'gr_list' => $this->conf['gr_list']
01339 );
01340 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields);
01341 }
01342
01351 function submit_section($hash,$hash_t3) {
01352 $fields = array(
01353 'phash' => $hash,
01354 'phash_t3' => $hash_t3,
01355 'page_id' => intval($this->conf['id'])
01356 );
01357
01358 $this->getRootLineFields($fields);
01359
01360 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields);
01361 }
01362
01369 function removeOldIndexedPages($phash) {
01370
01371 $tableArr = explode(',','index_phash,index_section,index_grlist,index_fulltext,index_debug');
01372 foreach($tableArr as $table) {
01373 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
01374 }
01375
01376 $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash));
01377 }
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396
01397
01412 function submitFilePage($hash,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts) {
01413
01414
01415 $storeItemType = $this->external_parsers[$ext]->ext2itemtype_map[$ext];
01416 $storeItemType = $storeItemType ? $storeItemType : $ext;
01417
01418
01419 $this->removeOldIndexedFiles($hash['phash']);
01420
01421
01422 $fileParts = parse_url($file);
01423
01424
01425 $fields = array(
01426 'phash' => $hash['phash'],
01427 'phash_grouping' => $hash['phash_grouping'],
01428 'cHashParams' => serialize($subinfo),
01429 'contentHash' => $content_md5h,
01430 'data_filename' => $file,
01431 'item_type' => $storeItemType,
01432 'item_title' => trim($contentParts['title']) ? $contentParts['title'] : basename($file),
01433 'item_description' => $this->bodyDescription($contentParts),
01434 'item_mtime' => $mtime,
01435 'item_size' => $size,
01436 'item_crdate' => $ctime,
01437 'tstamp' => time(),
01438 'crdate' => time(),
01439 'gr_list' => $this->conf['gr_list'],
01440 'externalUrl' => $fileParts['scheme'] ? 1 : 0,
01441 'recordUid' => intval($this->conf['recordUid']),
01442 'freeIndexUid' => intval($this->conf['freeIndexUid']),
01443 );
01444 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
01445
01446
01447 $fields = array(
01448 'phash' => $hash['phash'],
01449 'fulltextdata' => implode(' ', $contentParts)
01450 );
01451 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
01452
01453
01454 if ($this->indexerConfig['debugMode']) {
01455 $fields = array(
01456 'phash' => $hash['phash'],
01457 'debuginfo' => serialize(array(
01458 'cHashParams' => $subinfo,
01459 'contentParts' => array_merge($contentParts,array('body' => substr($contentParts['body'],0,1000))),
01460 'logs' => $this->internal_log,
01461 'lexer' => $this->lexerObj->debugString,
01462 ))
01463 );
01464 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
01465 }
01466 }
01467
01474 function submitFile_grlist($hash) {
01475
01476 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')');
01477 if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
01478 $this->submit_grlist($hash,$hash);
01479 }
01480 }
01481
01488 function submitFile_section($hash) {
01489
01490 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id']));
01491 if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
01492 $this->submit_section($hash,$this->hash['phash']);
01493 }
01494 }
01495
01502 function removeOldIndexedFiles($phash) {
01503
01504
01505 $tableArr = explode(',','index_phash,index_grlist,index_fulltext,index_debug');
01506 foreach($tableArr as $table) {
01507 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
01508 }
01509 }
01510
01511
01512
01513
01514
01515
01516
01517
01518
01519
01520
01521
01522
01523
01524
01525
01526
01527
01528
01529
01538 function checkMtimeTstamp($mtime,$phash) {
01539
01540
01541 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash));
01542 $out = 0;
01543
01544
01545 if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
01546 if ($this->tstamp_maxAge && ($row['tstamp']+$this->tstamp_maxAge) < time()) {
01547 $out = 1;
01548 } else {
01549 if (!$this->tstamp_minAge || ($row['tstamp']+$this->tstamp_minAge)<time()) {
01550 if ($mtime) {
01551 if ($row['item_mtime'] != $mtime) {
01552 $out = 2;
01553 } else {
01554 $out = -1;
01555 if ($this->tstamp_maxAge) {
01556 $this->log_setTSlogMessage('Mtime matched, timestamp NOT updated because a maxAge is set ('.($row['tstamp'] + $this->tstamp_maxAge - time()).' seconds to expire time).',1);
01557 } else {
01558 $this->updateTstamp($phash);
01559 $this->log_setTSlogMessage('Mtime matched, timestamp updated.',1);
01560 }
01561 }
01562 } else {$out = 3; }
01563 } else {$out = -2;}
01564 }
01565 } else {$out = 4;}
01566 return $out;
01567 }
01568
01574 function checkContentHash() {
01575
01576 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash A', 'A.phash_grouping='.intval($this->hash['phash_grouping']).' AND A.contentHash='.intval($this->content_md5h));
01577 if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
01578 return $row;
01579 }
01580 return 1;
01581 }
01582
01591 function checkExternalDocContentHash($hashGr,$content_md5h) {
01592 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h));
01593 if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
01594 return 0;
01595 }
01596 return 1;
01597 }
01598
01605 function is_grlist_set($phash_x) {
01606 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x));
01607 return $GLOBALS['TYPO3_DB']->sql_num_rows($res);
01608 }
01609
01618 function update_grlist($phash,$phash_x) {
01619 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list']));
01620 if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
01621 $this->submit_grlist($phash,$phash_x);
01622 $this->log_setTSlogMessage("Inserted gr_list '".$this->conf['gr_list']."' for phash '".$phash."'",1);
01623 }
01624 }
01625
01633 function updateTstamp($phash,$mtime=0) {
01634 $updateFields = array(
01635 'tstamp' => time()
01636 );
01637 if ($mtime) { $updateFields['item_mtime'] = intval($mtime); }
01638
01639 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
01640 }
01641
01649 function updateParsetime($phash,$parsetime) {
01650 $updateFields = array(
01651 'parsetime' => intval($parsetime)
01652 );
01653
01654 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
01655 }
01656
01662 function updateRootline() {
01663
01664 $updateFields = array();
01665 $this->getRootLineFields($updateFields);
01666
01667 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields);
01668 }
01669
01677 function getRootLineFields(&$fieldArr) {
01678
01679 $fieldArr['rl0'] = intval($this->conf['rootline_uids'][0]);
01680 $fieldArr['rl1'] = intval($this->conf['rootline_uids'][1]);
01681 $fieldArr['rl2'] = intval($this->conf['rootline_uids'][2]);
01682
01683 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) {
01684 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) {
01685 $fieldArr[$fieldName] = intval($this->conf['rootline_uids'][$rootLineLevel]);
01686 }
01687 }
01688 }
01689
01696 function removeLoginpagesWithContentHash() {
01697 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', '
01698 A.phash=B.phash
01699 AND A.phash_grouping='.intval($this->hash['phash_grouping']).'
01700 AND B.hash_gr_list!='.$this->md5inthash($this->defaultGrList).'
01701 AND A.contentHash='.intval($this->content_md5h));
01702 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
01703 $this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1);
01704 $this->removeOldIndexedPages($row['phash']);
01705 }
01706 }
01707
01708
01709
01710
01711
01712
01713
01714
01715
01716
01717
01718
01719
01720
01721
01722
01723
01724
01731 function checkWordList($wl) {
01732 reset($wl);
01733 $phashArr = array();
01734 while(list($key,) = each($wl)) {
01735 $phashArr[] = $wl[$key]['hash'];
01736 }
01737 if (count($phashArr)) {
01738 $cwl = implode(',',$phashArr);
01739 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')');
01740
01741 if($GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) {
01742 $this->log_setTSlogMessage('Inserting words: '.(count($wl)-$GLOBALS['TYPO3_DB']->sql_num_rows($res)),1);
01743 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
01744 unset($wl[$row['baseword']]);
01745 }
01746
01747 reset($wl);
01748 while(list($key,$val)=each($wl)) {
01749 $insertFields = array(
01750 'wid' => $val['hash'],
01751 'baseword' => $key,
01752 'metaphone' => $val['metaphone']
01753 );
01754
01755 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields);
01756 }
01757 }
01758 }
01759 }
01760
01768 function submitWords($wl,$phash) {
01769 $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash));
01770
01771 foreach($wl as $val) {
01772 $insertFields = array(
01773 'phash' => $phash,
01774 'wid' => $val['hash'],
01775 'count' => $val['count'],
01776 'first' => $val['first'],
01777 'freq' => $this->freqMap(($val['count']/$this->wordcount)),
01778 'flags' => ($val['cmp'] & $this->flagBitMask)
01779 );
01780
01781 $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields);
01782 }
01783 }
01784
01792 function freqMap($freq) {
01793 $mapFactor = $this->freqMax*100*$this->freqRange;
01794 if($freq<1) {
01795 $newFreq = $freq*$mapFactor;
01796 $newFreq = $newFreq>$this->freqRange?$this->freqRange:$newFreq;
01797 } else {
01798 $newFreq = $freq/$mapFactor;
01799 }
01800 return $newFreq;
01801
01802 }
01803
01804
01805
01806
01807
01808
01809
01810
01811
01812
01813
01814
01815
01816
01817
01818
01819
01825 function setT3Hashes() {
01826
01827
01828 $hArray = array(
01829 'id' => (integer)$this->conf['id'],
01830 'type' => (integer)$this->conf['type'],
01831 'sys_lang' => (integer)$this->conf['sys_language_uid'],
01832 'MP' => (string)$this->conf['MP'],
01833 'cHash' => $this->cHashParams
01834 );
01835
01836
01837 $this->hash['phash_grouping'] = $this->md5inthash(serialize($hArray));
01838
01839
01840 $hArray['gr_list'] = (string)$this->conf['gr_list'];
01841 $this->hash['phash'] = $this->md5inthash(serialize($hArray));
01842 }
01843
01851 function setExtHashes($file,$subinfo=array()) {
01852
01853 $hash = array();
01854 $hArray = array(
01855 'file' => $file,
01856 );
01857
01858
01859 $hash['phash_grouping'] = $this->md5inthash(serialize($hArray));
01860
01861
01862 $hArray['subinfo'] = $subinfo;
01863 $hash['phash'] = $this->md5inthash(serialize($hArray));
01864
01865 return $hash;
01866 }
01867
01875 function md5inthash($str) {
01876 return hexdec(substr(md5($str),0,7));
01877 }
01878
01885 function makeCHash($paramArray) {
01886 $addQueryParams = t3lib_div::implodeArrayForUrl('', $paramArray);
01887
01888 $pA = t3lib_div::cHashParams($addQueryParams);
01889
01890 return t3lib_div::shortMD5(serialize($pA));
01891 }
01892
01893
01894
01895
01896
01897
01898
01899
01900
01901
01902
01903
01904
01905
01906
01907
01908
01909
01917 function log_push($msg,$key) {
01918 if (is_object($GLOBALS['TT'])) $GLOBALS['TT']->push($msg,$key);
01919 }
01920
01926 function log_pull() {
01927 if (is_object($GLOBALS['TT'])) $GLOBALS['TT']->pull();
01928 }
01929
01937 function log_setTSlogMessage($msg, $errorNum=0) {
01938 if (is_object($GLOBALS['TT'])) $GLOBALS['TT']->setTSlogMessage($msg,$errorNum);
01939 $this->internal_log[] = $msg;
01940 }
01941
01942
01943
01944
01945
01946
01947
01948
01949
01950
01951
01952
01953
01954
01962 function fe_headerNoCache(&$params, $ref) {
01963
01964
01965 if (t3lib_extMgm::isLoaded('crawler')
01966 && $params['pObj']->applicationData['tx_crawler']['running']
01967 && in_array('tx_indexedsearch_reindex', $params['pObj']->applicationData['tx_crawler']['parameters']['procInstructions'])) {
01968
01969
01970 $params['pObj']->applicationData['tx_crawler']['log'][] = 'RE_CACHE (indexed), old status: '.$params['disableAcquireCacheData'];
01971
01972
01973 $params['disableAcquireCacheData'] = TRUE;
01974 }
01975 }
01976 }
01977
01978
01979 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.indexer.php']) {
01980 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.indexer.php']);
01981 }
01982 ?>