<?php include_once '../doc-typo3-funcs.php'; $doxygen_vars = array( "title" => "TYPO3 4.0.1: typo3_src-4.0.1/typo3/sysext/indexed_search/example/class.crawlerhook.php Source File", "datetime" => "Sat Dec 2 19:22:32 2006", "date" => "2 Dec 2006", "doxygenversion" => "1.4.6", "projectname" => "TYPO3 4.0.1", "projectnumber" => "4.0.1" ); get_header($doxygen_vars); ?> <!-- Generated by Doxygen 1.4.6 --> <div class="tabs"> <ul> <li><a href="main.html"><span>Main Page</span></a></li> <li><a href="namespaces.html"><span>Namespaces</span></a></li> <li><a href="classes.html"><span>Classes</span></a></li> <li id="current"><a href="files.html"><span>Files</span></a></li> <li><a href="dirs.html"><span>Directories</span></a></li> <li><a href="pages.html"><span>Related Pages</span></a></li> <li><a href="examples.html"><span>Examples</span></a></li> <li> <form action="search.php" method="get"> <table cellspacing="0" cellpadding="0" border="0"> <tr> <td><label> <u>S</u>earch for </label></td> <td><input type="text" name="query" value="" size="20" accesskey="s"/></td> </tr> </table> </form> </li> </ul></div> <div class="nav"> <a class="el" href="dir_c8daf1ad746050abf985cc546c89e248.html">typo3_src-4.0.1</a> » <a class="el" href="dir_18071ae4545d8b3e0364d30c0659c74a.html">typo3</a> » <a class="el" href="dir_57bf1ed8249c1fd5b014486d01bcb27a.html">sysext</a> » <a class="el" href="dir_1144f7dd65e866e7cd4aa66020137172.html">indexed_search</a> » <a class="el" href="dir_4be8f00cfb265b72fa315a2e8386ce71.html">example</a></div> <h1>class.crawlerhook.php</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <?php <a name="l00002"></a>00002 <span class="comment">/***************************************************************</span> <a name="l00003"></a>00003 <span class="comment">* Copyright notice</span> <a name="l00004"></a>00004 <span class="comment">*</span> <a name="l00005"></a>00005 <span class="comment">* (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)</span> <a name="l00006"></a>00006 <span class="comment">* All rights reserved</span> <a name="l00007"></a>00007 <span class="comment">*</span> <a name="l00008"></a>00008 <span class="comment">* This script is part of the TYPO3 project. The TYPO3 project is</span> <a name="l00009"></a>00009 <span class="comment">* free software; you can redistribute it and/or modify</span> <a name="l00010"></a>00010 <span class="comment">* it under the terms of the GNU General Public License as published by</span> <a name="l00011"></a>00011 <span class="comment">* the Free Software Foundation; either version 2 of the License, or</span> <a name="l00012"></a>00012 <span class="comment">* (at your option) any later version.</span> <a name="l00013"></a>00013 <span class="comment">*</span> <a name="l00014"></a>00014 <span class="comment">* The GNU General Public License can be found at</span> <a name="l00015"></a>00015 <span class="comment">* http://www.gnu.org/copyleft/gpl.html.</span> <a name="l00016"></a>00016 <span class="comment">* A copy is found in the textfile GPL.txt and important notices to the license</span> <a name="l00017"></a>00017 <span class="comment">* from the author is found in LICENSE.txt distributed with these scripts.</span> <a name="l00018"></a>00018 <span class="comment">*</span> <a name="l00019"></a>00019 <span class="comment">*</span> <a name="l00020"></a>00020 <span class="comment">* This script is distributed in the hope that it will be useful,</span> <a name="l00021"></a>00021 <span class="comment">* but WITHOUT ANY WARRANTY; without even the implied warranty of</span> <a name="l00022"></a>00022 <span class="comment">* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the</span> <a name="l00023"></a>00023 <span class="comment">* GNU General Public License for more details.</span> <a name="l00024"></a>00024 <span class="comment">*</span> <a name="l00025"></a>00025 <span class="comment">* This copyright notice MUST APPEAR in all copies of the script!</span> <a name="l00026"></a>00026 <span class="comment">***************************************************************/</span> <a name="l00057"></a><a class="code" href="classtx__indexedsearch__crawlerhook.html">00057</a> <span class="keyword">class </span><a class="code" href="classtx__indexedsearch__crawlerhook.html">tx_indexedsearch_crawlerhook</a> { <a name="l00058"></a>00058 <a name="l00064"></a><a class="code" href="classtx__indexedsearch__crawlerhook.html#db629d861ef65d87aa21c72450e2b654">00064</a> function <a class="code" href="classtx__indexedsearch__crawlerhook.html#db629d861ef65d87aa21c72450e2b654">initMessage</a>() { <a name="l00065"></a>00065 <span class="keywordflow">return</span> 'Start of Custom Example Indexing session!'; <a name="l00066"></a>00066 } <a name="l00067"></a>00067 <a name="l00080"></a><a class="code" href="classtx__indexedsearch__crawlerhook.html#73b24c6301b7fb197d1544457fcc4610">00080</a> function <a class="code" href="classtx__indexedsearch__crawlerhook.html#73b24c6301b7fb197d1544457fcc4610">indexOperation</a>($cfgRec,&$session_data,$params,&$pObj) { <a name="l00081"></a>00081 <a name="l00082"></a>00082 <span class="comment">// Init session data array if not already:</span> <a name="l00083"></a>00083 <span class="keywordflow">if</span> (!is_array($session_data)) { <a name="l00084"></a>00084 $session_data = array( <a name="l00085"></a>00085 'step' => 0 <a name="l00086"></a>00086 ); <a name="l00087"></a>00087 } <a name="l00088"></a>00088 <a name="l00089"></a>00089 <span class="comment">// Increase step counter (this is just an example of how the session data can be used - to track how many instances of indexing is left)</span> <a name="l00090"></a>00090 $session_data['step']++; <a name="l00091"></a>00091 <a name="l00092"></a>00092 <a name="l00093"></a>00093 <span class="keywordflow">switch</span>((<span class="keywordtype">int</span>)$session_data['step']) { <a name="l00094"></a>00094 <span class="keywordflow">case</span> 1: <span class="comment">// Indexing Example: Content accessed with GET parameters added to URL:</span> <a name="l00095"></a>00095 <a name="l00096"></a>00096 <span class="comment">// Load indexer if not yet [DON'T CHANGE]:</span> <a name="l00097"></a>00097 $pObj->loadIndexerClass(); <a name="l00098"></a>00098 <a name="l00099"></a>00099 <span class="comment">// Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:</span> <a name="l00100"></a>00100 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']); <a name="l00101"></a>00101 <a name="l00102"></a>00102 <span class="comment">// Set up language uid, if any:</span> <a name="l00103"></a>00103 $sys_language_uid = 0; <a name="l00104"></a>00104 <a name="l00105"></a>00105 <span class="comment">// Set up 2 example items to index:</span> <a name="l00106"></a>00106 $exampleItems = array( <a name="l00107"></a>00107 array( <a name="l00108"></a>00108 'ID' => '123', <a name="l00109"></a>00109 'title' => 'Title of Example 1', <a name="l00110"></a>00110 'content' => 'Vestibulum leo turpis, fringilla sit amet, semper eget, vestibulum ut, arcu. Vestibulum mauris orci, vulputate quis, congue eget, nonummy' <a name="l00111"></a>00111 ), <a name="l00112"></a>00112 array( <a name="l00113"></a>00113 'ID' => 'example2', <a name="l00114"></a>00114 'title' => 'Title of Example 2', <a name="l00115"></a>00115 'content' => 'Cras tortor turpis, vulputate non, accumsan a, pretium in, magna. Cras turpis turpis, pretium pulvinar, pretium vel, nonummy eu.' <a name="l00116"></a>00116 ) <a name="l00117"></a>00117 ); <a name="l00118"></a>00118 <a name="l00119"></a>00119 <span class="comment">// For each item, index it (this is what you might like to do in batches of like 100 items if all your content spans thousands of items!)</span> <a name="l00120"></a>00120 foreach($exampleItems as $item) { <a name="l00121"></a>00121 <a name="l00122"></a>00122 <span class="comment">// Prepare the GET variables array that must be added to the page URL in order to view result:</span> <a name="l00123"></a>00123 parse_str('&itemID='.rawurlencode($item['ID']), $GETparams); <a name="l00124"></a>00124 <a name="l00125"></a>00125 <span class="comment">// Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:</span> <a name="l00126"></a>00126 $indexerObj = &<a class="code" href="classt3lib__div.html#b47f8a8e8be44b79a0b8064dcd427bc1">t3lib_div::makeInstance</a>('<a class="code" href="classtx__indexedsearch__indexer.html">tx_indexedsearch_indexer</a>'); <a name="l00127"></a>00127 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, FALSE); <a name="l00128"></a>00128 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); <a name="l00129"></a>00129 $indexerObj->forceIndexing = TRUE; <a name="l00130"></a>00130 <a name="l00131"></a>00131 <span class="comment">// Indexing the content of the item (see tx_indexedsearch_indexer::backend_indexAsTYPO3Page() for options)</span> <a name="l00132"></a>00132 $indexerObj->backend_indexAsTYPO3Page( <a name="l00133"></a>00133 $item['title'], <a name="l00134"></a>00134 '', <a name="l00135"></a>00135 '', <a name="l00136"></a>00136 $item['content'], <a name="l00137"></a>00137 $GLOBALS['LANG']->charSet, <span class="comment">// Charset of content - MUST be set.</span> <a name="l00138"></a>00138 $item['tstamp'], <span class="comment">// Last-modified date</span> <a name="l00139"></a>00139 $item['create_date'], <span class="comment">// Created date</span> <a name="l00140"></a>00140 $item['ID'] <a name="l00141"></a>00141 ); <a name="l00142"></a>00142 } <a name="l00143"></a>00143 <span class="keywordflow">break</span>; <a name="l00144"></a>00144 <span class="keywordflow">case</span> 2: <span class="comment">// Indexing Example: Content accessed directly in file system:</span> <a name="l00145"></a>00145 <a name="l00146"></a>00146 <span class="comment">// Load indexer if not yet [DON'T CHANGE]:</span> <a name="l00147"></a>00147 $pObj->loadIndexerClass(); <a name="l00148"></a>00148 <a name="l00149"></a>00149 <span class="comment">// Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:</span> <a name="l00150"></a>00150 $rl = $pObj->getUidRootLineForClosestTemplate($cfgRec['pid']); <a name="l00151"></a>00151 <a name="l00152"></a>00152 <span class="comment">// Set up language uid, if any:</span> <a name="l00153"></a>00153 $sys_language_uid = 0; <a name="l00154"></a>00154 <a name="l00155"></a>00155 <span class="comment">// Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:</span> <a name="l00156"></a>00156 $indexerObj = &<a class="code" href="classt3lib__div.html#b47f8a8e8be44b79a0b8064dcd427bc1">t3lib_div::makeInstance</a>('<a class="code" href="classtx__indexedsearch__indexer.html">tx_indexedsearch_indexer</a>'); <a name="l00157"></a>00157 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl); <a name="l00158"></a>00158 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); <a name="l00159"></a>00159 $indexerObj->hash['phash'] = -1; <span class="comment">// To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)</span> <a name="l00160"></a>00160 <a name="l00161"></a>00161 <span class="comment">// Index document:</span> <a name="l00162"></a>00162 $indexerObj->indexRegularDocument('fileadmin/templates/index.html', TRUE); <a name="l00163"></a>00163 <span class="keywordflow">break</span>; <a name="l00164"></a>00164 <span class="keywordflow">case</span> 3: <span class="comment">// Indexing Example: Content accessed on External URLs:</span> <a name="l00165"></a>00165 <a name="l00166"></a>00166 <span class="comment">// Load indexer if not yet.</span> <a name="l00167"></a>00167 $pObj->loadIndexerClass(); <a name="l00168"></a>00168 <a name="l00169"></a>00169 <span class="comment">// Index external URL:</span> <a name="l00170"></a>00170 $indexerObj = &<a class="code" href="classt3lib__div.html#b47f8a8e8be44b79a0b8064dcd427bc1">t3lib_div::makeInstance</a>('<a class="code" href="classtx__indexedsearch__indexer.html">tx_indexedsearch_indexer</a>'); <a name="l00171"></a>00171 $indexerObj->backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl); <a name="l00172"></a>00172 $indexerObj->backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']); <a name="l00173"></a>00173 $indexerObj->hash['phash'] = -1; <span class="comment">// To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)</span> <a name="l00174"></a>00174 <a name="l00175"></a>00175 <span class="comment">// Index external URL (HTML only):</span> <a name="l00176"></a>00176 $indexerObj->indexExternalUrl('http:<span class="comment">//www.google.com/');</span> <a name="l00177"></a>00177 <span class="keywordflow">break</span>; <a name="l00178"></a>00178 } <a name="l00179"></a>00179 <a name="l00180"></a>00180 <span class="comment">// Finally, set entry for next indexing instance (if all steps are not completed)</span> <a name="l00181"></a>00181 <span class="keywordflow">if</span> ($session_data['step']<=3) { <a name="l00182"></a>00182 $title = 'Step #'.$session_data['step'].' of 3'; <span class="comment">// Just information field. Never mind that the field is called "url" - this is what will be shown in the "crawler" log. Could be a URL - or whatever else tells what that indexing instance will do.</span> <a name="l00183"></a>00183 $pObj->addQueueEntryForHook($cfgRec, $title); <a name="l00184"></a>00184 } <a name="l00185"></a>00185 } <a name="l00186"></a>00186 } <a name="l00187"></a>00187 <a name="l00188"></a>00188 <a name="l00189"></a>00189 <span class="keywordflow">if</span> (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/example/<span class="keyword">class</span>.crawlerhook.php']) { <a name="l00190"></a>00190 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/example/<span class="keyword">class</span>.crawlerhook.php']); <a name="l00191"></a>00191 } <a name="l00192"></a>00192 ?> </pre></div><?php include_once '../doc-typo3-funcs.php'; get_footer(); ?>