"TYPO3 4.0.1: typo3_src-4.0.1/typo3/sysext/indexed_search/class.crawler.php Source File", "datetime" => "Sat Dec 2 19:22:32 2006", "date" => "2 Dec 2006", "doxygenversion" => "1.4.6", "projectname" => "TYPO3 4.0.1", "projectnumber" => "4.0.1" ); get_header($doxygen_vars); ?>
00001 <?php 00002 /*************************************************************** 00003 * Copyright notice 00004 * 00005 * (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com) 00006 * All rights reserved 00007 * 00008 * This script is part of the TYPO3 project. The TYPO3 project is 00009 * free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * The GNU General Public License can be found at 00015 * http://www.gnu.org/copyleft/gpl.html. 00016 * A copy is found in the textfile GPL.txt and important notices to the license 00017 * from the author is found in LICENSE.txt distributed with these scripts. 00018 * 00019 * 00020 * This script is distributed in the hope that it will be useful, 00021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00023 * GNU General Public License for more details. 00024 * 00025 * This copyright notice MUST APPEAR in all copies of the script! 00026 ***************************************************************/ 00072 # To make sure the backend charset is available: 00073 require_once(PATH_site.TYPO3_mainDir.'sysext/lang/lang.php'); 00074 if (!is_object($GLOBALS['LANG'])) { 00075 $GLOBALS['LANG'] = t3lib_div::makeInstance('language'); 00076 $GLOBALS['LANG']->init($GLOBALS['BE_USER']->uc['lang']); 00077 } 00078 00079 00087 class tx_indexedsearch_crawler { 00088 00089 // Static: 00090 var $secondsPerExternalUrl = 3; // Number of seconds to use as interval between queued indexing operations of URLs / files (types 2 & 3) 00091 00092 // Internal, dynamic: 00093 var $instanceCounter = 0; // Counts up for each added URL (type 3) 00094 00095 // Internal, static: 00096 var $callBack = 'EXT:indexed_search/class.crawler.php:&tx_indexedsearch_crawler'; // The object reference to this class. 00097 00106 function crawler_init(&$pObj){ 00107 00108 // Select all indexing configuration which are waiting to be activated: 00109 $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00110 '*', 00111 'index_config', 00112 'hidden=0 00113 AND (starttime=0 OR starttime<='.time().') 00114 AND timer_next_indexing<'.time().' 00115 AND set_id=0 00116 '.t3lib_BEfunc::deleteClause('index_config') 00117 ); 00118 00119 // For each configuration, check if it should be executed and if so, start: 00120 foreach($indexingConfigurations as $cfgRec) { 00121 00122 // Generate a unique set-ID: 00123 $setId = t3lib_div::md5int(microtime()); 00124 00125 // Get next time: 00126 $nextTime = $this->generateNextIndexingTime($cfgRec); 00127 00128 // Start process by updating index-config record: 00129 $field_array = array ( 00130 'set_id' => $setId, 00131 'timer_next_indexing' => $nextTime, 00132 'session_data' => '', 00133 ); 00134 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config','uid='.intval($cfgRec['uid']), $field_array); 00135 00136 // Based on configuration type: 00137 switch($cfgRec['type']) { 00138 case 1: // RECORDS: 00139 00140 // Parameters: 00141 $params = array( 00142 'indexConfigUid' => $cfgRec['uid'], 00143 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'), 00144 'url' => 'Records (start)', // Just for show. 00145 ); 00146 // 00147 $pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']); 00148 break; 00149 case 2: // FILES: 00150 00151 // Parameters: 00152 $params = array( 00153 'indexConfigUid' => $cfgRec['uid'], // General 00154 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'), // General 00155 'url' => $cfgRec['filepath'], // Partly general... (for URL and file types) 00156 'depth' => 0 // Specific for URL and file types 00157 ); 00158 00159 $pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']); 00160 break; 00161 case 3: // External URL: 00162 00163 // Parameters: 00164 $params = array( 00165 'indexConfigUid' => $cfgRec['uid'], // General 00166 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'), // General 00167 'url' => $cfgRec['externalUrl'], // Partly general... (for URL and file types) 00168 'depth' => 0 // Specific for URL and file types 00169 ); 00170 00171 $pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']); 00172 break; 00173 case 4: // Page tree 00174 00175 // Parameters: 00176 $params = array( 00177 'indexConfigUid' => $cfgRec['uid'], // General 00178 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'), // General 00179 'url' => $cfgRec['alternative_source_pid'], // Partly general... (for URL and file types and page tree (root)) 00180 'depth' => 0 // Specific for URL and file types and page tree 00181 ); 00182 00183 $pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']); 00184 break; 00185 case 5: // Meta configuration, nothing to do: 00186 # NOOP 00187 break; 00188 default: 00189 if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) { 00190 $hookObj = &t3lib_div::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]); 00191 00192 if (is_object($hookObj)) { 00193 00194 // Parameters: 00195 $params = array( 00196 'indexConfigUid' => $cfgRec['uid'], // General 00197 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].'/CUSTOM]'), // General 00198 'url' => $hookObj->initMessage($message), 00199 ); 00200 00201 $pObj->addQueueEntry_callBack($setId,$params,$this->callBack,$cfgRec['pid']); 00202 } 00203 } 00204 break; 00205 } 00206 } 00207 00208 // Finally, look up all old index configurations which are finished and needs to be reset and done. 00209 $this->cleanUpOldRunningConfigurations(); 00210 } 00211 00219 function crawler_execute($params,&$pObj) { 00220 00221 // Indexer configuration ID must exist: 00222 if ($params['indexConfigUid']) { 00223 00224 // Load the indexing configuration record: 00225 list($cfgRec) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00226 '*', 00227 'index_config', 00228 'uid='.intval($params['indexConfigUid']) 00229 ); 00230 00231 if (is_array($cfgRec)) { 00232 00233 // Unpack session data: 00234 $session_data = unserialize($cfgRec['session_data']); 00235 00236 // Select which type: 00237 switch($cfgRec['type']) { 00238 case 1: // Records: 00239 $this->crawler_execute_type1($cfgRec,$session_data,$params,$pObj); 00240 break; 00241 case 2: // Files 00242 $this->crawler_execute_type2($cfgRec,$session_data,$params,$pObj); 00243 break; 00244 case 3: // External URL: 00245 $this->crawler_execute_type3($cfgRec,$session_data,$params,$pObj); 00246 break; 00247 case 4: // Page tree: 00248 $this->crawler_execute_type4($cfgRec,$session_data,$params,$pObj); 00249 break; 00250 case 5: // Meta 00251 # NOOP (should never enter here!) 00252 break; 00253 default: 00254 if ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]) { 00255 $hookObj = &t3lib_div::getUserObj($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['crawler'][$cfgRec['type']]); 00256 00257 if (is_object($hookObj)) { 00258 $this->pObj = &$pObj; // For addQueueEntryForHook() 00259 $hookObj->indexOperation($cfgRec,$session_data,$params,$this); 00260 } 00261 } 00262 break; 00263 } 00264 00265 // Save process data which might be modified: 00266 $field_array = array ( 00267 'session_data' => serialize($session_data) 00268 ); 00269 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config','uid='.intval($cfgRec['uid']), $field_array); 00270 } 00271 } 00272 00273 return array('log' => $params); 00274 } 00275 00285 function crawler_execute_type1($cfgRec,&$session_data,$params,&$pObj) { 00286 if ($cfgRec['table2index'] && isset($GLOBALS['TCA'][$cfgRec['table2index']])) { 00287 00288 // Init session data array if not already: 00289 if (!is_array($session_data)) { 00290 $session_data = array( 00291 'uid' => 0 00292 ); 00293 } 00294 00295 // Init: 00296 $pid = intval($cfgRec['alternative_source_pid']) ? intval($cfgRec['alternative_source_pid']) : $cfgRec['pid']; 00297 $numberOfRecords = $cfgRec['recordsbatch'] ? t3lib_div::intInRange($cfgRec['recordsbatch'],1) : 100; 00298 00299 // Get root line: 00300 $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']); 00301 00302 // Select 00303 $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00304 '*', 00305 $cfgRec['table2index'], 00306 'pid = '.intval($pid).' 00307 AND uid > '.intval($session_data['uid']). 00308 t3lib_BEfunc::deleteClause($cfgRec['table2index']), 00309 '', 00310 'uid', 00311 $numberOfRecords 00312 ); 00313 00314 // Traverse: 00315 if (count($recs)) { 00316 foreach($recs as $r) { 00317 00318 // Index single record: 00319 $this->indexSingleRecord($r,$cfgRec,$rl); 00320 00321 // Update the UID we last processed: 00322 $session_data['uid'] = $r['uid']; 00323 } 00324 00325 // Finally, set entry for next indexing of batch of records: 00326 $nparams = array( 00327 'indexConfigUid' => $cfgRec['uid'], 00328 'url' => 'Records from UID#'.($r['uid']+1).'-?', 00329 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']') 00330 ); 00331 $pObj->addQueueEntry_callBack($cfgRec['set_id'],$nparams,$this->callBack,$cfgRec['pid']); 00332 } 00333 } 00334 } 00335 00345 function crawler_execute_type2($cfgRec,&$session_data,$params,&$pObj) { 00346 00347 // Prepare path, making it absolute and checking: 00348 $readpath = $params['url']; 00349 if (!t3lib_div::isAbsPath($readpath)) { 00350 $readpath = t3lib_div::getFileAbsFileName($readpath); 00351 } 00352 00353 if (t3lib_div::isAllowedAbsPath($readpath)) { 00354 if (@is_file($readpath)) { // If file, index it! 00355 00356 // Get root line (need to provide this when indexing external files) 00357 $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']); 00358 00359 // Load indexer if not yet. 00360 $this->loadIndexerClass(); 00361 00362 // (Re)-Indexing file on page. 00363 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00364 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, 0, '', $rl); 00365 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); 00366 $indexerObj->hash['phash'] = -1; // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!) 00367 00368 // Index document: 00369 $indexerObj->indexRegularDocument(substr($readpath,strlen(PATH_site)), TRUE); 00370 } elseif (@is_dir($readpath)) { // If dir, read content and create new pending items for log: 00371 00372 // Select files and directories in path: 00373 $extList = implode(',',t3lib_div::trimExplode(',',$cfgRec['extensions'],1)); 00374 $fileArr = array(); 00375 $files = t3lib_div::getAllFilesAndFoldersInPath($fileArr,$readpath,$extList,0,0); 00376 00377 $directoryList = t3lib_div::get_dirs($readpath); 00378 if (is_array($directoryList) && $params['depth'] < $cfgRec['depth']) { 00379 foreach ($directoryList as $subdir) { 00380 if ((string)$subdir!='') { 00381 $files[]= $readpath.$subdir.'/'; 00382 } 00383 } 00384 } 00385 $files = t3lib_div::removePrefixPathFromList($files,PATH_site); 00386 00387 // traverse the items and create log entries: 00388 foreach($files as $path) { 00389 $this->instanceCounter++; 00390 if ($path!==$params['url']) { 00391 // Parameters: 00392 $nparams = array( 00393 'indexConfigUid' => $cfgRec['uid'], 00394 'url' => $path, 00395 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'), 00396 'depth' => $params['depth']+1 00397 ); 00398 $pObj->addQueueEntry_callBack($cfgRec['set_id'],$nparams,$this->callBack,$cfgRec['pid'],time()+$this->instanceCounter*$this->secondsPerExternalUrl); 00399 } 00400 } 00401 } 00402 } 00403 } 00404 00414 function crawler_execute_type3($cfgRec,&$session_data,$params,&$pObj) { 00415 00416 // Init session data array if not already: 00417 if (!is_array($session_data)) { 00418 $session_data = array( 00419 'urlLog' => array($params['url']) 00420 ); 00421 } 00422 00423 // Index the URL: 00424 $rl = $this->getUidRootLineForClosestTemplate($cfgRec['pid']); 00425 $subUrls = $this->indexExtUrl($params['url'], $cfgRec['pid'], $rl, $cfgRec['uid'], $cfgRec['set_id']); 00426 00427 // Add more elements to log now: 00428 if ($params['depth'] < $cfgRec['depth']) { 00429 foreach($subUrls as $url) { 00430 if ($url = $this->checkUrl($url,$session_data['urlLog'],$cfgRec['externalUrl'])) { 00431 if (!$this->checkDeniedSuburls($url, $cfgRec['url_deny'])) { 00432 $this->instanceCounter++; 00433 $session_data['urlLog'][] = $url; 00434 00435 // Parameters: 00436 $nparams = array( 00437 'indexConfigUid' => $cfgRec['uid'], 00438 'url' => $url, 00439 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'), 00440 'depth' => $params['depth']+1 00441 ); 00442 $pObj->addQueueEntry_callBack($cfgRec['set_id'],$nparams,$this->callBack,$cfgRec['pid'],time()+$this->instanceCounter*$this->secondsPerExternalUrl); 00443 } 00444 } 00445 } 00446 } 00447 } 00448 00458 function crawler_execute_type4($cfgRec,&$session_data,$params,&$pObj) { 00459 00460 // Base page uid: 00461 $pageUid = intval($params['url']); 00462 00463 // Get array of URLs from page: 00464 $pageRow = t3lib_BEfunc::getRecord('pages',$pageUid); 00465 $res = $pObj->getUrlsForPageRow($pageRow); 00466 00467 $duplicateTrack = array(); // Registry for duplicates 00468 $downloadUrls = array(); // Dummy. 00469 00470 // Submit URLs: 00471 if (count($res)) { 00472 foreach($res as $paramSetKey => $vv) { 00473 $urlList = $pObj->urlListFromUrlArray($vv,$pageRow,time(),30,1,0,$duplicateTrack,$downloadUrls,array('tx_indexedsearch_reindex')); 00474 } 00475 } 00476 00477 // Add subpages to log now: 00478 if ($params['depth'] < $cfgRec['depth']) { 00479 00480 // Subpages selected 00481 $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00482 'uid,title', 00483 'pages', 00484 'pid = '.intval($pageUid). 00485 t3lib_BEfunc::deleteClause('pages') 00486 ); 00487 00488 // Traverse subpages and add to queue: 00489 if (count($recs)) { 00490 foreach($recs as $r) { 00491 $this->instanceCounter++; 00492 $url = 'pages:'.$r['uid'].': '.$r['title']; 00493 $session_data['urlLog'][] = $url; 00494 00495 // Parameters: 00496 $nparams = array( 00497 'indexConfigUid' => $cfgRec['uid'], 00498 'url' => $r['uid'], 00499 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']'), 00500 'depth' => $params['depth']+1 00501 ); 00502 $pObj->addQueueEntry_callBack($cfgRec['set_id'],$nparams,$this->callBack,$cfgRec['pid'],time()+$this->instanceCounter*$this->secondsPerExternalUrl); 00503 } 00504 } 00505 } 00506 } 00507 00513 function cleanUpOldRunningConfigurations() { 00514 00515 // Lookup running index configurations: 00516 $runningIndexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00517 'uid,set_id', 00518 'index_config', 00519 'set_id!=0'.t3lib_BEfunc::deleteClause('index_config') 00520 ); 00521 00522 // For each running configuration, look up how many log entries there are which are scheduled for execution and if none, clear the "set_id" (means; Processing was DONE) 00523 foreach($runningIndexingConfigurations as $cfgRec) { 00524 00525 // Look for ended processes: 00526 list($queued_items) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00527 'count(*) AS count', 00528 'tx_crawler_queue', 00529 'set_id='.intval($cfgRec['set_id']).' AND exec_time=0' 00530 ); 00531 00532 if (!$queued_items['count']) { 00533 00534 // Lookup old phash rows: 00535 $oldPhashRows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00536 'phash', 00537 'index_phash', 00538 'freeIndexUid='.intval($cfgRec['uid']).' AND freeIndexSetId!='.$cfgRec['set_id'] 00539 ); 00540 00541 foreach($oldPhashRows as $pHashRow) { 00542 // Removing old registrations for all tables (code copied from class.tx_indexedsearch_modfunc1.php) 00543 $tableArr = explode(',','index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug'); 00544 foreach($tableArr as $table) { 00545 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($pHashRow['phash'])); 00546 } 00547 } 00548 00549 // End process by updating index-config record: 00550 $field_array = array ( 00551 'set_id' => 0, 00552 'session_data' => '', 00553 ); 00554 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_config','uid='.intval($cfgRec['uid']), $field_array); 00555 } 00556 } 00557 } 00558 00559 00560 00561 00562 00563 00564 00565 /***************************************** 00566 * 00567 * Helper functions 00568 * 00569 *****************************************/ 00570 00579 function checkUrl($url,$urlLog,$baseUrl) { 00580 $url = ereg_replace('\/\/$','/',$url); 00581 list($url) = explode('#',$url); 00582 00583 if (!strstr($url,'../')) { 00584 if (t3lib_div::isFirstPartOfStr($url,$baseUrl)) { 00585 if (!in_array($url,$urlLog)) { 00586 return $url; 00587 } 00588 } 00589 } 00590 } 00591 00602 function indexExtUrl($url, $pageId, $rl, $cfgUid, $setId) { 00603 00604 // Load indexer if not yet. 00605 $this->loadIndexerClass(); 00606 00607 // Index external URL: 00608 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00609 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl); 00610 $indexerObj->backend_setFreeIndexUid($cfgUid, $setId); 00611 $indexerObj->hash['phash'] = -1; // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!) 00612 00613 $indexerObj->indexExternalUrl($url); 00614 $url_qParts = parse_url($url); 00615 00616 // Get URLs on this page: 00617 $subUrls = array(); 00618 $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content); 00619 00620 // Traverse links: 00621 foreach($list as $count => $linkInfo) { 00622 00623 // Decode entities: 00624 $subUrl = t3lib_div::htmlspecialchars_decode($linkInfo['href']); 00625 00626 $qParts = parse_url($subUrl); 00627 if (!$qParts['scheme']) { 00628 $subUrl = $url_qParts['scheme'].'://'.$url_qParts['host'].'/'.t3lib_div::resolveBackPath($subUrl); 00629 } 00630 00631 $subUrls[] = $subUrl; 00632 } 00633 00634 return $subUrls; 00635 } 00636 00645 function indexSingleRecord($r,$cfgRec,$rl=NULL) { 00646 00647 // Load indexer if not yet. 00648 $this->loadIndexerClass(); 00649 00650 00651 // Init: 00652 $rl = is_array($rl) ? $rl : $this->getUidRootLineForClosestTemplate($cfgRec['pid']); 00653 $fieldList = t3lib_div::trimExplode(',',$cfgRec['fieldlist'],1); 00654 $languageField = $GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['languageField']; 00655 $sys_language_uid = $languageField ? $r[$languageField] : 0; 00656 00657 // (Re)-Indexing a row from a table: 00658 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00659 parse_str(str_replace('###UID###',$r['uid'],$cfgRec['get_params']),$GETparams); 00660 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, $cfgRec['chashcalc'] ? TRUE : FALSE); 00661 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); 00662 $indexerObj->forceIndexing = TRUE; 00663 00664 $theContent = ''; 00665 foreach($fieldList as $k => $v) { 00666 if (!$k) { 00667 $theTitle = $r[$v]; 00668 } else { 00669 $theContent.= $r[$v].' '; 00670 } 00671 } 00672 00673 // Indexing the record as a page (but with parameters set, see ->backend_setFreeIndexUid()) 00674 $indexerObj->backend_indexAsTYPO3Page( 00675 strip_tags($theTitle), 00676 '', 00677 '', 00678 strip_tags($theContent), 00679 $GLOBALS['LANG']->charSet, // Requires that 00680 $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['tstamp']], 00681 $r[$GLOBALS['TCA'][$cfgRec['table2index']]['ctrl']['crdate']], 00682 $r['uid'] 00683 ); 00684 00685 #echo print_r($indexerObj->internal_log); 00686 #echo print_r($indexerObj->contentParts); 00687 } 00688 00694 function loadIndexerClass() { 00695 global $TYPO3_CONF_VARS; 00696 require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php'); 00697 } 00698 00706 function getUidRootLineForClosestTemplate($id) { 00707 global $TYPO3_CONF_VARS; 00708 00709 require_once (PATH_t3lib."class.t3lib_page.php"); 00710 require_once (PATH_t3lib."class.t3lib_tstemplate.php"); 00711 require_once (PATH_t3lib."class.t3lib_tsparser_ext.php"); 00712 00713 00714 00715 $tmpl = t3lib_div::makeInstance("t3lib_tsparser_ext"); 00716 $tmpl->tt_track = 0; // Do not log time-performance information 00717 $tmpl->init(); 00718 00719 // Gets the rootLine 00720 $sys_page = t3lib_div::makeInstance("t3lib_pageSelect"); 00721 $rootLine = $sys_page->getRootLine($id); 00722 $tmpl->runThroughTemplates($rootLine,0); // This generates the constants/config + hierarchy info for the template. 00723 00724 // Root line uids 00725 $rootline_uids = array(); 00726 foreach($tmpl->rootLine as $rlkey => $rldat) { 00727 $rootline_uids[$rlkey] = $rldat['uid']; 00728 } 00729 00730 return $rootline_uids; 00731 } 00732 00739 function generateNextIndexingTime($cfgRec) { 00740 $currentTime = time(); 00741 00742 // Now, find a midnight time to use for offset calculation. This has to differ depending on whether we have frequencies within a day or more than a day; Less than a day, we don't care which day to use for offset, more than a day we want to respect the currently entered day as offset regardless of when the script is run - thus the day-of-week used in case "Weekly" is selected will be respected 00743 if ($cfgRec['timer_frequency']<=24*3600) { 00744 $aMidNight = mktime (0,0,0)-1*24*3600; 00745 } else { 00746 $lastTime = $cfgRec['timer_next_indexing']?$cfgRec['timer_next_indexing']:time(); 00747 $aMidNight = mktime (0,0,0, date('m',$lastTime), date('d',$lastTime), date('y',$lastTime)); 00748 } 00749 00750 // Find last offset time plus frequency in seconds: 00751 $lastSureOffset = $aMidNight+t3lib_div::intInRange($cfgRec['timer_offset'],0,86400); 00752 $frequencySeconds = t3lib_div::intInRange($cfgRec['timer_frequency'],1); 00753 00754 // Now, find out how many blocks of the length of frequency there is until the next time: 00755 $frequencyBlocksUntilNextTime = ceil(($currentTime-$lastSureOffset)/$frequencySeconds); 00756 00757 // Set next time to the offset + the frequencyblocks multiplied with the frequency length in seconds. 00758 $nextTime = $lastSureOffset + $frequencyBlocksUntilNextTime*$frequencySeconds; 00759 00760 return $nextTime; 00761 /* echo print_r(array( 00762 $cfgRec['timer_offset'], 00763 $frequencySeconds, 00764 date('d-m-Y H:i:s',$currentTime), 00765 date('d-m-Y H:i:s',$aMidNight), 00766 date('d-m-Y H:i:s',$lastSureOffset), 00767 date('d-m-Y H:i:s',$nextTime) 00768 )); 00769 */ } 00770 00778 function checkDeniedSuburls($url, $url_deny) { 00779 if (trim($url_deny)) { 00780 $url_denyArray = t3lib_div::trimExplode(chr(10),$url_deny,1); 00781 foreach($url_denyArray as $testurl) { 00782 if (t3lib_div::isFirstPartOfStr($url,$testurl)) { 00783 echo $url.' 00784 return TRUE; 00785 } 00786 } 00787 } 00788 return FALSE; 00789 } 00790 00798 function addQueueEntryForHook($cfgRec, $title) { 00799 00800 $nparams = array( 00801 'indexConfigUid' => $cfgRec['uid'], // This must ALWAYS be the cfgRec uid! 00802 'url' => $title, 00803 'procInstructions' => array('[Index Cfg UID#'.$cfgRec['uid'].']') // Also just for information. Its good style to show that its an indexing configuration that added the entry. 00804 ); 00805 $this->pObj->addQueueEntry_callBack($cfgRec['set_id'],$nparams,$this->callBack,$cfgRec['pid']); 00806 } 00807 00808 00809 00810 00811 00812 00813 00814 /************************* 00815 * 00816 * Hook functions for TCEmain (indexing of records) 00817 * 00818 *************************/ 00819 00830 function processDatamap_afterDatabaseOperations($status, $table, $id, $fieldArray, &$pObj) { 00831 00832 // Check if any fields are actually updated: 00833 if (count($fieldArray)) { 00834 00835 // Translate new ids. 00836 if ($status=='new') { 00837 $id = $pObj->substNEWwithIDs[$id]; 00838 } 00839 00840 // Get full record and if exists, search for indexing configurations: 00841 $currentRecord = t3lib_BEfunc::getRecord($table,$id); 00842 if (is_array($currentRecord)) { 00843 00844 // Select all (not running) indexing configurations of type "record" (1) and which points to this table and is located on the same page as the record or pointing to the right source PID 00845 $indexingConfigurations = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00846 '*', 00847 'index_config', 00848 'hidden=0 00849 AND (starttime=0 OR starttime<='.time().') 00850 AND set_id=0 00851 AND type=1 00852 AND table2index='.$GLOBALS['TYPO3_DB']->fullQuoteStr($table,'index_config').' 00853 AND ( 00854 (alternative_source_pid='.$GLOBALS['TYPO3_DB']->fullQuoteStr('','index_config').' AND pid='.intval($currentRecord['pid']).') 00855 OR (alternative_source_pid='.$GLOBALS['TYPO3_DB']->fullQuoteStr($currentRecord['pid'],'index_config').') 00856 ) 00857 AND records_indexonchange=1 00858 '.t3lib_BEfunc::deleteClause('index_config') 00859 ); 00860 00861 foreach($indexingConfigurations as $cfgRec) { 00862 $this->indexSingleRecord($currentRecord,$cfgRec); 00863 } 00864 } 00865 } 00866 } 00867 } 00868 00869 00879 class tx_indexedsearch_files { 00880 00888 function crawler_execute($params,&$pObj) { 00889 00890 // Load indexer if not yet. 00891 $this->loadIndexerClass(); 00892 00893 if (is_array($params['conf'])) { 00894 00895 // Initialize the indexer class: 00896 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00897 $indexerObj->conf = $params['conf']; 00898 $indexerObj->init(); 00899 00900 // Index document: 00901 if ($params['alturl']) { 00902 $fI = pathinfo($params['document']); 00903 $ext = strtolower($fI['extension']); 00904 $indexerObj->indexRegularDocument($params['alturl'], TRUE, $params['document'], $ext); 00905 } else { 00906 $indexerObj->indexRegularDocument($params['document'], TRUE); 00907 } 00908 00909 // Return OK: 00910 return array('content' => array()); 00911 } 00912 } 00913 00919 function loadIndexerClass() { 00920 global $TYPO3_CONF_VARS; 00921 require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php'); 00922 } 00923 } 00924 00925 00926 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.crawler.php']) { 00927 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/class.crawler.php']); 00928 } 00929 ?>