"TYPO3 4.0.1: typo3_src-4.0.1/typo3/sysext/indexed_search/example/class.crawlerhook.php Source File", "datetime" => "Sat Dec 2 19:22:32 2006", "date" => "2 Dec 2006", "doxygenversion" => "1.4.6", "projectname" => "TYPO3 4.0.1", "projectnumber" => "4.0.1" ); get_header($doxygen_vars); ?>

class.crawlerhook.php

00001 <?php
00002 /***************************************************************
00003 *  Copyright notice
00004 *
00005 *  (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)
00006 *  All rights reserved
00007 *
00008 *  This script is part of the TYPO3 project. The TYPO3 project is
00009 *  free software; you can redistribute it and/or modify
00010 *  it under the terms of the GNU General Public License as published by
00011 *  the Free Software Foundation; either version 2 of the License, or
00012 *  (at your option) any later version.
00013 *
00014 *  The GNU General Public License can be found at
00015 *  http://www.gnu.org/copyleft/gpl.html.
00016 *  A copy is found in the textfile GPL.txt and important notices to the license
00017 *  from the author is found in LICENSE.txt distributed with these scripts.
00018 *
00019 *
00020 *  This script is distributed in the hope that it will be useful,
00021 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023 *  GNU General Public License for more details.
00024 *
00025 *  This copyright notice MUST APPEAR in all copies of the script!
00026 ***************************************************************/
00057 class tx_indexedsearch_crawlerhook {
00058 
00064         function initMessage()  {
00065                 return 'Start of Custom Example Indexing session!';
00066         }
00067 
00080         function indexOperation($cfgRec,&$session_data,$params,&$pObj)  {
00081 
00082                         // Init session data array if not already:
00083                 if (!is_array($session_data))   {
00084                         $session_data = array(
00085                                 'step' => 0
00086                         );
00087                 }
00088 
00089                         // Increase step counter (this is just an example of how the session data can be used - to track how many instances of indexing is left)
00090                 $session_data['step']++;
00091 
00092 
00093                 switch((int)$session_data['step'])      {
00094                          case 1:        // Indexing Example: Content accessed with GET parameters added to URL:
00095 
00096                                         // Load indexer if not yet [DON'T CHANGE]:
00097                                 $pObj->loadIndexerClass();
00098 
00099                                         // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:
00100                                 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']);
00101 
00102                                         // Set up language uid, if any:
00103                                 $sys_language_uid = 0;
00104 
00105                                         // Set up 2 example items to index:
00106                                 $exampleItems = array(
00107                                         array(
00108                                                 'ID' => '123',
00109                                                 'title' => 'Title of Example 1',
00110                                                 'content' => 'Vestibulum leo turpis, fringilla sit amet, semper eget, vestibulum ut, arcu. Vestibulum mauris orci, vulputate quis, congue eget, nonummy'
00111                                         ),
00112                                         array(
00113                                                 'ID' => 'example2',
00114                                                 'title' => 'Title of Example 2',
00115                                                 'content' => 'Cras tortor turpis, vulputate non, accumsan a, pretium in, magna. Cras turpis turpis, pretium pulvinar, pretium vel, nonummy eu.'
00116                                         )
00117                                 );
00118 
00119                                         // For each item, index it (this is what you might like to do in batches of like 100 items if all your content spans thousands of items!)
00120                                 foreach($exampleItems as $item) {
00121 
00122                                                 // Prepare the GET variables array that must be added to the page URL in order to view result:
00123                                         parse_str('&itemID='.rawurlencode($item['ID']), $GETparams);
00124 
00125                                                 // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:
00126                                         $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
00127                                         $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, FALSE);
00128                                         $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
00129                                         $indexerObj->forceIndexing = TRUE;
00130 
00131                                                 // Indexing the content of the item (see tx_indexedsearch_indexer::backend_indexAsTYPO3Page() for options)
00132                                         $indexerObj->backend_indexAsTYPO3Page(
00133                                                 $item['title'],
00134                                                 '',
00135                                                 '',
00136                                                 $item['content'],
00137                                                 $GLOBALS['LANG']->charSet,      // Charset of content - MUST be set.
00138                                                 $item['tstamp'],                        // Last-modified date
00139                                                 $item['create_date'],           // Created date
00140                                                 $item['ID']
00141                                         );
00142                                 }
00143                          break;
00144                          case 2: // Indexing Example: Content accessed directly in file system:
00145 
00146                                         // Load indexer if not yet [DON'T CHANGE]:
00147                                 $pObj->loadIndexerClass();
00148 
00149                                         // Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:
00150                                 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']);
00151 
00152                                         // Set up language uid, if any:
00153                                 $sys_language_uid = 0;
00154 
00155                                         // Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:
00156                                 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
00157                                 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl);
00158                                 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
00159                                 $indexerObj->hash['phash'] = -1;        // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
00160 
00161                                         // Index document:
00162                                 $indexerObj->indexRegularDocument('fileadmin/templates/index.html', TRUE);
00163                          break;
00164                          case 3: // Indexing Example: Content accessed on External URLs:
00165 
00166                                         // Load indexer if not yet.
00167                                 $pObj->loadIndexerClass();
00168 
00169                                         // Index external URL:
00170                                 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
00171                                 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl);
00172                                 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
00173                                 $indexerObj->hash['phash'] = -1;        // To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
00174 
00175                                         // Index external URL (HTML only):
00176                                 $indexerObj->indexExternalUrl('http://www.google.com/');
00177                          break;
00178                 }
00179 
00180                         // Finally, set entry for next indexing instance (if all steps are not completed)
00181                 if ($session_data['step']<=3)   {
00182                         $title = 'Step #'.$session_data['step'].' of 3';        // Just information field. Never mind that the field is called "url" - this is what will be shown in the "crawler" log. Could be a URL - or whatever else tells what that indexing instance will do.
00183                         $pObj->addQueueEntryForHook($cfgRec, $title);
00184                 }
00185         }
00186 }
00187 
00188 
00189 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/example/class.crawlerhook.php'])        {
00190         include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/example/class.crawlerhook.php']);
00191 }
00192 ?>