Documentation TYPO3 par Ameos |
00001 <?php 00002 /*************************************************************** 00003 * Copyright notice 00004 * 00005 * (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com) 00006 * All rights reserved 00007 * 00008 * This script is part of the TYPO3 project. The TYPO3 project is 00009 * free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * The GNU General Public License can be found at 00015 * http://www.gnu.org/copyleft/gpl.html. 00016 * A copy is found in the textfile GPL.txt and important notices to the license 00017 * from the author is found in LICENSE.txt distributed with these scripts. 00018 * 00019 * 00020 * This script is distributed in the hope that it will be useful, 00021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00023 * GNU General Public License for more details. 00024 * 00025 * This copyright notice MUST APPEAR in all copies of the script! 00026 ***************************************************************/ 00057 class tx_indexedsearch_crawlerhook { 00058 00064 function initMessage() { 00065 return 'Start of Custom Example Indexing session!'; 00066 } 00067 00080 function indexOperation($cfgRec,&$session_data,$params,&$pObj) { 00081 00082 // Init session data array if not already: 00083 if (!is_array($session_data)) { 00084 $session_data = array( 00085 'step' => 0 00086 ); 00087 } 00088 00089 // Increase step counter (this is just an example of how the session data can be used - to track how many instances of indexing is left) 00090 $session_data['step']++; 00091 00092 00093 switch((int)$session_data['step']) { 00094 case 1: // Indexing Example: Content accessed with GET parameters added to URL: 00095 00096 // Load indexer if not yet [DON'T CHANGE]: 00097 $pObj->loadIndexerClass(); 00098 00099 // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]: 00100 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']); 00101 00102 // Set up language uid, if any: 00103 $sys_language_uid = 0; 00104 00105 // Set up 2 example items to index: 00106 $exampleItems = array( 00107 array( 00108 'ID' => '123', 00109 'title' => 'Title of Example 1', 00110 'content' => 'Vestibulum leo turpis, fringilla sit amet, semper eget, vestibulum ut, arcu. Vestibulum mauris orci, vulputate quis, congue eget, nonummy' 00111 ), 00112 array( 00113 'ID' => 'example2', 00114 'title' => 'Title of Example 2', 00115 'content' => 'Cras tortor turpis, vulputate non, accumsan a, pretium in, magna. Cras turpis turpis, pretium pulvinar, pretium vel, nonummy eu.' 00116 ) 00117 ); 00118 00119 // For each item, index it (this is what you might like to do in batches of like 100 items if all your content spans thousands of items!) 00120 foreach($exampleItems as $item) { 00121 00122 // Prepare the GET variables array that must be added to the page URL in order to view result: 00123 parse_str('&itemID='.rawurlencode($item['ID']), $GETparams); 00124 00125 // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]: 00126 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00127 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, FALSE); 00128 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); 00129 $indexerObj->forceIndexing = TRUE; 00130 00131 // Indexing the content of the item (see tx_indexedsearch_indexer::backend_indexAsTYPO3Page() for options) 00132 $indexerObj->backend_indexAsTYPO3Page( 00133 $item['title'], 00134 '', 00135 '', 00136 $item['content'], 00137 $GLOBALS['LANG']->charSet, // Charset of content - MUST be set. 00138 $item['tstamp'], // Last-modified date 00139 $item['create_date'], // Created date 00140 $item['ID'] 00141 ); 00142 } 00143 break; 00144 case 2: // Indexing Example: Content accessed directly in file system: 00145 00146 // Load indexer if not yet [DON'T CHANGE]: 00147 $pObj->loadIndexerClass(); 00148 00149 // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]: 00150 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']); 00151 00152 // Set up language uid, if any: 00153 $sys_language_uid = 0; 00154 00155 // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]: 00156 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00157 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl); 00158 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); 00159 $indexerObj->hash['phash'] = -1; // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!) 00160 00161 // Index document: 00162 $indexerObj->indexRegularDocument('fileadmin/templates/index.html', TRUE); 00163 break; 00164 case 3: // Indexing Example: Content accessed on External URLs: 00165 00166 // Load indexer if not yet. 00167 $pObj->loadIndexerClass(); 00168 00169 // Index external URL: 00170 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00171 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl); 00172 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); 00173 $indexerObj->hash['phash'] = -1; // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!) 00174 00175 // Index external URL (HTML only): 00176 $indexerObj->indexExternalUrl('http://www.google.com/'); 00177 break; 00178 } 00179 00180 // Finally, set entry for next indexing instance (if all steps are not completed) 00181 if ($session_data['step']<=3) { 00182 $title = 'Step #'.$session_data['step'].' of 3'; // Just information field. Never mind that the field is called "url" - this is what will be shown in the "crawler" log. Could be a URL - or whatever else tells what that indexing instance will do. 00183 $pObj->addQueueEntryForHook($cfgRec, $title); 00184 } 00185 } 00186 } 00187 00188 00189 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/example/class.crawlerhook.php']) { 00190 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/example/class.crawlerhook.php']); 00191 } 00192 ?>