<?php
  include_once '../doc-typo3-funcs.php';
  $doxygen_vars = array(	"title" => "TYPO3 4.0.1: typo3_src-4.0.1/typo3/sysext/indexed_search/example/class.crawlerhook.php Source File",
				"datetime" => "Sat Dec 2 19:22:32 2006",
				"date" => "2 Dec 2006",
				"doxygenversion" => "1.4.6",
				"projectname" => "TYPO3 4.0.1",
				"projectnumber" => "4.0.1"
			);
  get_header($doxygen_vars);
?>
<!-- Generated by Doxygen 1.4.6 -->
<div class="tabs">
  <ul>
    <li><a href="main.html"><span>Main&nbsp;Page</span></a></li>
    <li><a href="namespaces.html"><span>Namespaces</span></a></li>
    <li><a href="classes.html"><span>Classes</span></a></li>
    <li id="current"><a href="files.html"><span>Files</span></a></li>
    <li><a href="dirs.html"><span>Directories</span></a></li>
    <li><a href="pages.html"><span>Related&nbsp;Pages</span></a></li>
    <li><a href="examples.html"><span>Examples</span></a></li>
    <li>
      <form action="search.php" method="get">
        <table cellspacing="0" cellpadding="0" border="0">
          <tr>
            <td><label>&nbsp;<u>S</u>earch&nbsp;for&nbsp;</label></td>
            <td><input type="text" name="query" value="" size="20" accesskey="s"/></td>
          </tr>
        </table>
      </form>
    </li>
  </ul></div>
<div class="nav">
<a class="el" href="dir_c8daf1ad746050abf985cc546c89e248.html">typo3_src-4.0.1</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_18071ae4545d8b3e0364d30c0659c74a.html">typo3</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_57bf1ed8249c1fd5b014486d01bcb27a.html">sysext</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_1144f7dd65e866e7cd4aa66020137172.html">indexed_search</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_4be8f00cfb265b72fa315a2e8386ce71.html">example</a></div>
<h1>class.crawlerhook.php</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 &lt;?php
<a name="l00002"></a>00002 <span class="comment">/***************************************************************</span>
<a name="l00003"></a>00003 <span class="comment">*  Copyright notice</span>
<a name="l00004"></a>00004 <span class="comment">*</span>
<a name="l00005"></a>00005 <span class="comment">*  (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)</span>
<a name="l00006"></a>00006 <span class="comment">*  All rights reserved</span>
<a name="l00007"></a>00007 <span class="comment">*</span>
<a name="l00008"></a>00008 <span class="comment">*  This script is part of the TYPO3 project. The TYPO3 project is</span>
<a name="l00009"></a>00009 <span class="comment">*  free software; you can redistribute it and/or modify</span>
<a name="l00010"></a>00010 <span class="comment">*  it under the terms of the GNU General Public License as published by</span>
<a name="l00011"></a>00011 <span class="comment">*  the Free Software Foundation; either version 2 of the License, or</span>
<a name="l00012"></a>00012 <span class="comment">*  (at your option) any later version.</span>
<a name="l00013"></a>00013 <span class="comment">*</span>
<a name="l00014"></a>00014 <span class="comment">*  The GNU General Public License can be found at</span>
<a name="l00015"></a>00015 <span class="comment">*  http://www.gnu.org/copyleft/gpl.html.</span>
<a name="l00016"></a>00016 <span class="comment">*  A copy is found in the textfile GPL.txt and important notices to the license</span>
<a name="l00017"></a>00017 <span class="comment">*  from the author is found in LICENSE.txt distributed with these scripts.</span>
<a name="l00018"></a>00018 <span class="comment">*</span>
<a name="l00019"></a>00019 <span class="comment">*</span>
<a name="l00020"></a>00020 <span class="comment">*  This script is distributed in the hope that it will be useful,</span>
<a name="l00021"></a>00021 <span class="comment">*  but WITHOUT ANY WARRANTY; without even the implied warranty of</span>
<a name="l00022"></a>00022 <span class="comment">*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the</span>
<a name="l00023"></a>00023 <span class="comment">*  GNU General Public License for more details.</span>
<a name="l00024"></a>00024 <span class="comment">*</span>
<a name="l00025"></a>00025 <span class="comment">*  This copyright notice MUST APPEAR in all copies of the script!</span>
<a name="l00026"></a>00026 <span class="comment">***************************************************************/</span>
<a name="l00057"></a><a class="code" href="classtx__indexedsearch__crawlerhook.html">00057</a> <span class="keyword">class </span><a class="code" href="classtx__indexedsearch__crawlerhook.html">tx_indexedsearch_crawlerhook</a> {
<a name="l00058"></a>00058 
<a name="l00064"></a><a class="code" href="classtx__indexedsearch__crawlerhook.html#db629d861ef65d87aa21c72450e2b654">00064</a>         function <a class="code" href="classtx__indexedsearch__crawlerhook.html#db629d861ef65d87aa21c72450e2b654">initMessage</a>()  {
<a name="l00065"></a>00065                 <span class="keywordflow">return</span> 'Start of Custom Example Indexing session!';
<a name="l00066"></a>00066         }
<a name="l00067"></a>00067 
<a name="l00080"></a><a class="code" href="classtx__indexedsearch__crawlerhook.html#73b24c6301b7fb197d1544457fcc4610">00080</a>         function <a class="code" href="classtx__indexedsearch__crawlerhook.html#73b24c6301b7fb197d1544457fcc4610">indexOperation</a>($cfgRec,&amp;$session_data,$params,&amp;$pObj)  {
<a name="l00081"></a>00081 
<a name="l00082"></a>00082                         <span class="comment">// Init session data array if not already:</span>
<a name="l00083"></a>00083                 <span class="keywordflow">if</span> (!is_array($session_data))   {
<a name="l00084"></a>00084                         $session_data = array(
<a name="l00085"></a>00085                                 'step' =&gt; 0
<a name="l00086"></a>00086                         );
<a name="l00087"></a>00087                 }
<a name="l00088"></a>00088 
<a name="l00089"></a>00089                         <span class="comment">// Increase step counter (this is just an example of how the session data can be used - to track how many instances of indexing is left)</span>
<a name="l00090"></a>00090                 $session_data['step']++;
<a name="l00091"></a>00091 
<a name="l00092"></a>00092 
<a name="l00093"></a>00093                 <span class="keywordflow">switch</span>((<span class="keywordtype">int</span>)$session_data['step'])      {
<a name="l00094"></a>00094                          <span class="keywordflow">case</span> 1:        <span class="comment">// Indexing Example: Content accessed with GET parameters added to URL:</span>
<a name="l00095"></a>00095 
<a name="l00096"></a>00096                                         <span class="comment">// Load indexer if not yet [DON'T CHANGE]:</span>
<a name="l00097"></a>00097                                 $pObj-&gt;loadIndexerClass();
<a name="l00098"></a>00098 
<a name="l00099"></a>00099                                         <span class="comment">// Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:</span>
<a name="l00100"></a>00100                                 $rl = $pObj-&gt;getUidRootLineForClosestTemplate($cfgRec['pid']);
<a name="l00101"></a>00101 
<a name="l00102"></a>00102                                         <span class="comment">// Set up language uid, if any:</span>
<a name="l00103"></a>00103                                 $sys_language_uid = 0;
<a name="l00104"></a>00104 
<a name="l00105"></a>00105                                         <span class="comment">// Set up 2 example items to index:</span>
<a name="l00106"></a>00106                                 $exampleItems = array(
<a name="l00107"></a>00107                                         array(
<a name="l00108"></a>00108                                                 'ID' =&gt; '123',
<a name="l00109"></a>00109                                                 'title' =&gt; 'Title of Example 1',
<a name="l00110"></a>00110                                                 'content' =&gt; 'Vestibulum leo turpis, fringilla sit amet, semper eget, vestibulum ut, arcu. Vestibulum mauris orci, vulputate quis, congue eget, nonummy'
<a name="l00111"></a>00111                                         ),
<a name="l00112"></a>00112                                         array(
<a name="l00113"></a>00113                                                 'ID' =&gt; 'example2',
<a name="l00114"></a>00114                                                 'title' =&gt; 'Title of Example 2',
<a name="l00115"></a>00115                                                 'content' =&gt; 'Cras tortor turpis, vulputate non, accumsan a, pretium in, magna. Cras turpis turpis, pretium pulvinar, pretium vel, nonummy eu.'
<a name="l00116"></a>00116                                         )
<a name="l00117"></a>00117                                 );
<a name="l00118"></a>00118 
<a name="l00119"></a>00119                                         <span class="comment">// For each item, index it (this is what you might like to do in batches of like 100 items if all your content spans thousands of items!)</span>
<a name="l00120"></a>00120                                 foreach($exampleItems as $item) {
<a name="l00121"></a>00121 
<a name="l00122"></a>00122                                                 <span class="comment">// Prepare the GET variables array that must be added to the page URL in order to view result:</span>
<a name="l00123"></a>00123                                         parse_str('&amp;itemID='.rawurlencode($item['ID']), $GETparams);
<a name="l00124"></a>00124 
<a name="l00125"></a>00125                                                 <span class="comment">// Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:</span>
<a name="l00126"></a>00126                                         $indexerObj = &amp;<a class="code" href="classt3lib__div.html#b47f8a8e8be44b79a0b8064dcd427bc1">t3lib_div::makeInstance</a>('<a class="code" href="classtx__indexedsearch__indexer.html">tx_indexedsearch_indexer</a>');
<a name="l00127"></a>00127                                         $indexerObj-&gt;backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl, $GETparams, FALSE);
<a name="l00128"></a>00128                                         $indexerObj-&gt;backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
<a name="l00129"></a>00129                                         $indexerObj-&gt;forceIndexing = TRUE;
<a name="l00130"></a>00130 
<a name="l00131"></a>00131                                                 <span class="comment">// Indexing the content of the item (see tx_indexedsearch_indexer::backend_indexAsTYPO3Page() for options)</span>
<a name="l00132"></a>00132                                         $indexerObj-&gt;backend_indexAsTYPO3Page(
<a name="l00133"></a>00133                                                 $item['title'],
<a name="l00134"></a>00134                                                 '',
<a name="l00135"></a>00135                                                 '',
<a name="l00136"></a>00136                                                 $item['content'],
<a name="l00137"></a>00137                                                 $GLOBALS['LANG']-&gt;charSet,      <span class="comment">// Charset of content - MUST be set.</span>
<a name="l00138"></a>00138                                                 $item['tstamp'],                        <span class="comment">// Last-modified date</span>
<a name="l00139"></a>00139                                                 $item['create_date'],           <span class="comment">// Created date</span>
<a name="l00140"></a>00140                                                 $item['ID']
<a name="l00141"></a>00141                                         );
<a name="l00142"></a>00142                                 }
<a name="l00143"></a>00143                          <span class="keywordflow">break</span>;
<a name="l00144"></a>00144                          <span class="keywordflow">case</span> 2: <span class="comment">// Indexing Example: Content accessed directly in file system:</span>
<a name="l00145"></a>00145 
<a name="l00146"></a>00146                                         <span class="comment">// Load indexer if not yet [DON'T CHANGE]:</span>
<a name="l00147"></a>00147                                 $pObj-&gt;loadIndexerClass();
<a name="l00148"></a>00148 
<a name="l00149"></a>00149                                         <span class="comment">// Get rootline from the Indexing Record (needed because the indexer relates all search results to a position in the page tree!) [DON'T CHANGE]:</span>
<a name="l00150"></a>00150                                 $rl = $pObj-&gt;getUidRootLineForClosestTemplate($cfgRec['pid']);
<a name="l00151"></a>00151 
<a name="l00152"></a>00152                                         <span class="comment">// Set up language uid, if any:</span>
<a name="l00153"></a>00153                                 $sys_language_uid = 0;
<a name="l00154"></a>00154 
<a name="l00155"></a>00155                                         <span class="comment">// Prepare indexer (make instance, initialize it, set special features for indexing parameterized content - probably none of this should be changed by you) [DON'T CHANGE]:</span>
<a name="l00156"></a>00156                                 $indexerObj = &amp;<a class="code" href="classt3lib__div.html#b47f8a8e8be44b79a0b8064dcd427bc1">t3lib_div::makeInstance</a>('<a class="code" href="classtx__indexedsearch__indexer.html">tx_indexedsearch_indexer</a>');
<a name="l00157"></a>00157                                 $indexerObj-&gt;backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl);
<a name="l00158"></a>00158                                 $indexerObj-&gt;backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
<a name="l00159"></a>00159                                 $indexerObj-&gt;hash['phash'] = -1;        <span class="comment">// To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)</span>
<a name="l00160"></a>00160 
<a name="l00161"></a>00161                                         <span class="comment">// Index document:</span>
<a name="l00162"></a>00162                                 $indexerObj-&gt;indexRegularDocument('fileadmin/templates/index.html', TRUE);
<a name="l00163"></a>00163                          <span class="keywordflow">break</span>;
<a name="l00164"></a>00164                          <span class="keywordflow">case</span> 3: <span class="comment">// Indexing Example: Content accessed on External URLs:</span>
<a name="l00165"></a>00165 
<a name="l00166"></a>00166                                         <span class="comment">// Load indexer if not yet.</span>
<a name="l00167"></a>00167                                 $pObj-&gt;loadIndexerClass();
<a name="l00168"></a>00168 
<a name="l00169"></a>00169                                         <span class="comment">// Index external URL:</span>
<a name="l00170"></a>00170                                 $indexerObj = &amp;<a class="code" href="classt3lib__div.html#b47f8a8e8be44b79a0b8064dcd427bc1">t3lib_div::makeInstance</a>('<a class="code" href="classtx__indexedsearch__indexer.html">tx_indexedsearch_indexer</a>');
<a name="l00171"></a>00171                                 $indexerObj-&gt;backend_initIndexer($cfgRec['pid'], 0, $sys_language_uid, '', $rl);
<a name="l00172"></a>00172                                 $indexerObj-&gt;backend_setFreeIndexUid($cfgRec['uid'], $cfgRec['set_id']);
<a name="l00173"></a>00173                                 $indexerObj-&gt;hash['phash'] = -1;        <span class="comment">// To avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)</span>
<a name="l00174"></a>00174 
<a name="l00175"></a>00175                                         <span class="comment">// Index external URL (HTML only):</span>
<a name="l00176"></a>00176                                 $indexerObj-&gt;indexExternalUrl('http:<span class="comment">//www.google.com/');</span>
<a name="l00177"></a>00177                          <span class="keywordflow">break</span>;
<a name="l00178"></a>00178                 }
<a name="l00179"></a>00179 
<a name="l00180"></a>00180                         <span class="comment">// Finally, set entry for next indexing instance (if all steps are not completed)</span>
<a name="l00181"></a>00181                 <span class="keywordflow">if</span> ($session_data['step']&lt;=3)   {
<a name="l00182"></a>00182                         $title = 'Step #'.$session_data['step'].' of 3';        <span class="comment">// Just information field. Never mind that the field is called "url" - this is what will be shown in the "crawler" log. Could be a URL - or whatever else tells what that indexing instance will do.</span>
<a name="l00183"></a>00183                         $pObj-&gt;addQueueEntryForHook($cfgRec, $title);
<a name="l00184"></a>00184                 }
<a name="l00185"></a>00185         }
<a name="l00186"></a>00186 }
<a name="l00187"></a>00187 
<a name="l00188"></a>00188 
<a name="l00189"></a>00189 <span class="keywordflow">if</span> (defined('TYPO3_MODE') &amp;&amp; $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/example/<span class="keyword">class</span>.crawlerhook.php'])        {
<a name="l00190"></a>00190         include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/example/<span class="keyword">class</span>.crawlerhook.php']);
<a name="l00191"></a>00191 }
<a name="l00192"></a>00192 ?&gt;
</pre></div><?php
  include_once '../doc-typo3-funcs.php';
  get_footer();
?>