<?php
  include_once '../doc-typo3-funcs.php';
  $doxygen_vars = array(	"title" => "TYPO3 4.0.1: typo3_src-4.0.1/typo3/sysext/indexed_search/class.indexer.php Source File",
				"datetime" => "Sat Dec 2 19:22:32 2006",
				"date" => "2 Dec 2006",
				"doxygenversion" => "1.4.6",
				"projectname" => "TYPO3 4.0.1",
				"projectnumber" => "4.0.1"
			);
  get_header($doxygen_vars);
?>
<!-- Generated by Doxygen 1.4.6 -->
<div class="tabs">
  <ul>
    <li><a href="main.html"><span>Main&nbsp;Page</span></a></li>
    <li><a href="namespaces.html"><span>Namespaces</span></a></li>
    <li><a href="classes.html"><span>Classes</span></a></li>
    <li id="current"><a href="files.html"><span>Files</span></a></li>
    <li><a href="dirs.html"><span>Directories</span></a></li>
    <li><a href="pages.html"><span>Related&nbsp;Pages</span></a></li>
    <li><a href="examples.html"><span>Examples</span></a></li>
    <li>
      <form action="search.php" method="get">
        <table cellspacing="0" cellpadding="0" border="0">
          <tr>
            <td><label>&nbsp;<u>S</u>earch&nbsp;for&nbsp;</label></td>
            <td><input type="text" name="query" value="" size="20" accesskey="s"/></td>
          </tr>
        </table>
      </form>
    </li>
  </ul></div>
<div class="nav">
<a class="el" href="dir_c8daf1ad746050abf985cc546c89e248.html">typo3_src-4.0.1</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_18071ae4545d8b3e0364d30c0659c74a.html">typo3</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_57bf1ed8249c1fd5b014486d01bcb27a.html">sysext</a>&nbsp;&raquo&nbsp;<a class="el" href="dir_1144f7dd65e866e7cd4aa66020137172.html">indexed_search</a></div>
<h1>class.indexer.php</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 &lt;?php
<a name="l00002"></a>00002 <span class="comment">/***************************************************************</span>
<a name="l00003"></a>00003 <span class="comment">*  Copyright notice</span>
<a name="l00004"></a>00004 <span class="comment">*</span>
<a name="l00005"></a>00005 <span class="comment">*  (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)</span>
<a name="l00006"></a>00006 <span class="comment">*  All rights reserved</span>
<a name="l00007"></a>00007 <span class="comment">*</span>
<a name="l00008"></a>00008 <span class="comment">*  This script is part of the TYPO3 project. The TYPO3 project is</span>
<a name="l00009"></a>00009 <span class="comment">*  free software; you can redistribute it and/or modify</span>
<a name="l00010"></a>00010 <span class="comment">*  it under the terms of the GNU General Public License as published by</span>
<a name="l00011"></a>00011 <span class="comment">*  the Free Software Foundation; either version 2 of the License, or</span>
<a name="l00012"></a>00012 <span class="comment">*  (at your option) any later version.</span>
<a name="l00013"></a>00013 <span class="comment">*</span>
<a name="l00014"></a>00014 <span class="comment">*  The GNU General Public License can be found at</span>
<a name="l00015"></a>00015 <span class="comment">*  http://www.gnu.org/copyleft/gpl.html.</span>
<a name="l00016"></a>00016 <span class="comment">*  A copy is found in the textfile GPL.txt and important notices to the license</span>
<a name="l00017"></a>00017 <span class="comment">*  from the author is found in LICENSE.txt distributed with these scripts.</span>
<a name="l00018"></a>00018 <span class="comment">*</span>
<a name="l00019"></a>00019 <span class="comment">*</span>
<a name="l00020"></a>00020 <span class="comment">*  This script is distributed in the hope that it will be useful,</span>
<a name="l00021"></a>00021 <span class="comment">*  but WITHOUT ANY WARRANTY; without even the implied warranty of</span>
<a name="l00022"></a>00022 <span class="comment">*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the</span>
<a name="l00023"></a>00023 <span class="comment">*  GNU General Public License for more details.</span>
<a name="l00024"></a>00024 <span class="comment">*</span>
<a name="l00025"></a>00025 <span class="comment">*  This copyright notice MUST APPEAR in all copies of the script!</span>
<a name="l00026"></a>00026 <span class="comment">***************************************************************/</span>
<a name="l00131"></a>00131 require_once(PATH_t3lib.'<span class="keyword">class</span>.t3lib_parsehtml.php');
<a name="l00132"></a>00132 
<a name="l00133"></a>00133 
<a name="l00141"></a><a class="code" href="classtx__indexedsearch__indexer.html">00141</a> <span class="keyword">class </span><a class="code" href="classtx__indexedsearch__indexer.html">tx_indexedsearch_indexer</a> {
<a name="l00142"></a>00142 
<a name="l00143"></a>00143                 <span class="comment">// Messages:</span>
<a name="l00144"></a><a class="code" href="classtx__indexedsearch__indexer.html#b8bd074bbc24f6abf40a2d1f569a5a19">00144</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#b8bd074bbc24f6abf40a2d1f569a5a19">$reasons</a> = array(
<a name="l00145"></a>00145                 -1 =&gt; 'mtime matched the document, so no changes detected and no content updated',
<a name="l00146"></a>00146                 -2 =&gt; 'The minimum age was not exceeded',
<a name="l00147"></a>00147                 1 =&gt; <span class="stringliteral">"The configured max-age was exceeded for the document and thus it's indexed."</span>,
<a name="l00148"></a>00148                 2 =&gt; 'The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.',
<a name="l00149"></a>00149                 3 =&gt; 'The minimum age was exceed, but mtime was not set, so the page was indexed.',
<a name="l00150"></a>00150                 4 =&gt; 'Page has never been indexed (is not represented in the index_phash table).'
<a name="l00151"></a>00151         );
<a name="l00152"></a>00152 
<a name="l00153"></a>00153                 <span class="comment">// HTML code blocks to exclude from indexing:</span>
<a name="l00154"></a><a class="code" href="classtx__indexedsearch__indexer.html#9e8f829e9198ad9be46d4bc5b409a141">00154</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#9e8f829e9198ad9be46d4bc5b409a141">$excludeSections</a> = 'script,style';
<a name="l00155"></a>00155 
<a name="l00156"></a>00156                 <span class="comment">// Supported Extensions for external files:</span>
<a name="l00157"></a><a class="code" href="classtx__indexedsearch__indexer.html#3194c5f21957c79a4140841bea300d18">00157</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#3194c5f21957c79a4140841bea300d18">$external_parsers</a> = array();                <span class="comment">// External parser objects, keys are file extension names. Values are objects with certain methods.</span>
<a name="l00158"></a>00158 
<a name="l00159"></a>00159                 <span class="comment">// Fe-group list (pages might be indexed separately for each usergroup combination to support search in access limited pages!)</span>
<a name="l00160"></a><a class="code" href="classtx__indexedsearch__indexer.html#b4b367090b361bf98a2094bddbe823da">00160</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#b4b367090b361bf98a2094bddbe823da">$defaultGrList</a> = '0,-1';
<a name="l00161"></a>00161 
<a name="l00162"></a>00162                 <span class="comment">// Min/Max times:</span>
<a name="l00163"></a><a class="code" href="classtx__indexedsearch__indexer.html#5ddd8f8183ae7f47432184015caab36e">00163</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#5ddd8f8183ae7f47432184015caab36e">$tstamp_maxAge</a> = 0;         <span class="comment">// If set, this tells a number of seconds that is the maximum age of an indexed document. Regardless of mtime the document will be re-indexed if this limit is exceeded.</span>
<a name="l00164"></a><a class="code" href="classtx__indexedsearch__indexer.html#85f5625dbd642fa2c4175f1beaf752f7">00164</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#85f5625dbd642fa2c4175f1beaf752f7">$tstamp_minAge</a> = 0;         <span class="comment">// If set, this tells a minimum limit before a document can be indexed again. This is regardless of mtime.</span>
<a name="l00165"></a><a class="code" href="classtx__indexedsearch__indexer.html#0af53f392e093a5a415d8c76ed7febb2">00165</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#0af53f392e093a5a415d8c76ed7febb2">$maxExternalFiles</a> = 0;      <span class="comment">// Max number of external files to index.</span>
<a name="l00166"></a>00166 
<a name="l00167"></a><a class="code" href="classtx__indexedsearch__indexer.html#79c49528df75c1a7ec0d644e6003506d">00167</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#79c49528df75c1a7ec0d644e6003506d">$forceIndexing</a> = FALSE;             <span class="comment">// If true, indexing is forced despite of hashes etc.</span>
<a name="l00168"></a><a class="code" href="classtx__indexedsearch__indexer.html#88b8b7a8c871108868aa2724e9dd9369">00168</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#88b8b7a8c871108868aa2724e9dd9369">$crawlerActive</a> = FALSE;             <span class="comment">// Set when crawler is detected (internal)</span>
<a name="l00169"></a>00169 
<a name="l00170"></a>00170                 <span class="comment">// INTERNALS:</span>
<a name="l00171"></a><a class="code" href="classtx__indexedsearch__indexer.html#e2ad275507de8fe536d64b7120a80f5d">00171</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#e2ad275507de8fe536d64b7120a80f5d">$defaultContentArray</a>=array(
<a name="l00172"></a>00172                 'title' =&gt; '',
<a name="l00173"></a>00173                 'description' =&gt; '',
<a name="l00174"></a>00174                 'keywords' =&gt; '',
<a name="l00175"></a>00175                 'body' =&gt; '',
<a name="l00176"></a>00176         );
<a name="l00177"></a><a class="code" href="classtx__indexedsearch__indexer.html#0a5a023d9194e95fe8b16beff456d8cb">00177</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#0a5a023d9194e95fe8b16beff456d8cb">$wordcount</a> = 0;
<a name="l00178"></a><a class="code" href="classtx__indexedsearch__indexer.html#27a18e53ef547bebe69b931d0663eed8">00178</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#27a18e53ef547bebe69b931d0663eed8">$externalFileCounter</a> = 0;
<a name="l00179"></a>00179 
<a name="l00180"></a><a class="code" href="classtx__indexedsearch__indexer.html#3e934b748ba3f8ebcfbb928c64e637bd">00180</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#3e934b748ba3f8ebcfbb928c64e637bd">$conf</a> = array();            <span class="comment">// Configuration set internally (see init functions for required keys and their meaning)</span>
<a name="l00181"></a><a class="code" href="classtx__indexedsearch__indexer.html#7a87bbaa1b2b0ca40ad3cc24470bfac3">00181</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#7a87bbaa1b2b0ca40ad3cc24470bfac3">$indexerConfig</a> = array();   <span class="comment">// Indexer configuration, coming from $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']</span>
<a name="l00182"></a><a class="code" href="classtx__indexedsearch__indexer.html#33ffce025eb15e9436d430ba743ff2b5">00182</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#33ffce025eb15e9436d430ba743ff2b5">$hash</a> = array();            <span class="comment">// Hash array, contains phash and phash_grouping</span>
<a name="l00183"></a><a class="code" href="classtx__indexedsearch__indexer.html#944406ecae5d8046ac68bf148cac5f64">00183</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#944406ecae5d8046ac68bf148cac5f64">$file_phash_arr</a> = array();  <span class="comment">// Hash array for files</span>
<a name="l00184"></a><a class="code" href="classtx__indexedsearch__indexer.html#51583f60aeea0e3b916acd2ae0f1f1a0">00184</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#51583f60aeea0e3b916acd2ae0f1f1a0">$contentParts</a> = array();    <span class="comment">// Content of TYPO3 page</span>
<a name="l00185"></a><a class="code" href="classtx__indexedsearch__indexer.html#07e73367870b25dee473cb7e91836d85">00185</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#07e73367870b25dee473cb7e91836d85">$content_md5h</a> = '';
<a name="l00186"></a><a class="code" href="classtx__indexedsearch__indexer.html#88f955b13031e7368015c7aac53f5ea1">00186</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#88f955b13031e7368015c7aac53f5ea1">$internal_log</a> = array();    <span class="comment">// Internal log</span>
<a name="l00187"></a><a class="code" href="classtx__indexedsearch__indexer.html#e1391ed6621508db761a1335428f67f2">00187</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#e1391ed6621508db761a1335428f67f2">$indexExternalUrl_content</a> = '';
<a name="l00188"></a>00188 
<a name="l00189"></a><a class="code" href="classtx__indexedsearch__indexer.html#9482daf7da9d61436f93f11c6c335e23">00189</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#9482daf7da9d61436f93f11c6c335e23">$cHashParams</a> = array();     <span class="comment">// cHashparams array</span>
<a name="l00190"></a>00190 
<a name="l00191"></a><a class="code" href="classtx__indexedsearch__indexer.html#04330e818b4f71feebb6bfb6aa8d836c">00191</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#04330e818b4f71feebb6bfb6aa8d836c">$freqRange</a> = 32000;
<a name="l00192"></a><a class="code" href="classtx__indexedsearch__indexer.html#24c2ccbcb8e8d8ea5a908242da0d2230">00192</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#24c2ccbcb8e8d8ea5a908242da0d2230">$freqMax</a> = 0.1;
<a name="l00193"></a>00193 
<a name="l00194"></a>00194                 <span class="comment">// Objects:</span>
<a name="l00195"></a><a class="code" href="classtx__indexedsearch__indexer.html#dd1a88ec01b9548a8f14f4043c611459">00195</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#dd1a88ec01b9548a8f14f4043c611459">$csObj</a>;                             <span class="comment">// Charset class object , t3lib_cs</span>
<a name="l00196"></a><a class="code" href="classtx__indexedsearch__indexer.html#86ddbf1c45fd6c9b4c81b0002f3138d8">00196</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#86ddbf1c45fd6c9b4c81b0002f3138d8">$metaphoneObj</a>;              <span class="comment">// Metaphone object, if any</span>
<a name="l00197"></a><a class="code" href="classtx__indexedsearch__indexer.html#6123b0d1465284f1266223441b1203b0">00197</a>         var <a class="code" href="classtx__indexedsearch__indexer.html#6123b0d1465284f1266223441b1203b0">$lexerObj</a>;                  <span class="comment">// Lexer object for word splitting</span>
<a name="l00198"></a>00198 
<a name="l00199"></a>00199 
<a name="l00200"></a>00200 
<a name="l00207"></a><a class="code" href="classtx__indexedsearch__indexer.html#9784325d02a46d7ba818296a9a5d71d2">00207</a>         function <a class="code" href="classtx__indexedsearch__indexer.html#9784325d02a46d7ba818296a9a5d71d2">hook_indexContent</a>(&amp;$pObj)      {
<a name="l00208"></a>00208 
<a name="l00209"></a>00209                         <span class="comment">// Indexer configuration from Extension Manager interface:</span>
<a name="l00210"></a>00210                 <a class="code" href="classtx__indexedsearch__indexer.html#7a87bbaa1b2b0ca40ad3cc24470bfac3">$indexerConfig</a> = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
<a name="l00211"></a>00211 
<a name="l00212"></a>00212                         <span class="comment">// Crawler activation:</span>
<a name="l00213"></a>00213                         <span class="comment">// Requirements are that the crawler is loaded, a crawler session is running and re-indexing requested as processing instruction:</span>
<a name="l00214"></a>00214                 <span class="keywordflow">if</span> (<a class="code" href="classt3lib__extMgm.html#297116a3b1e17045ff193f170b8c4a29">t3lib_extMgm::isLoaded</a>('crawler')
<a name="l00215"></a>00215                                 &amp;&amp; $pObj-&gt;applicationData['tx_crawler']['running']
<a name="l00216"></a>00216                                 &amp;&amp; in_array('tx_indexedsearch_reindex', $pObj-&gt;applicationData['tx_crawler']['parameters']['procInstructions']))        {
<a name="l00217"></a>00217 
<a name="l00218"></a>00218                                 <span class="comment">// Setting simple log message:</span>
<a name="l00219"></a>00219                         $pObj-&gt;applicationData['tx_crawler']['log'][] = 'Forced Re-indexing enabled';
<a name="l00220"></a>00220 
<a name="l00221"></a>00221                                 <span class="comment">// Setting variables:</span>
<a name="l00222"></a>00222                         $this-&gt;crawlerActive = TRUE;    <span class="comment">// Crawler active flag</span>
<a name="l00223"></a>00223                         $this-&gt;forceIndexing = TRUE;    <span class="comment">// Force indexing despite timestamps etc.</span>
<a name="l00224"></a>00224                 }
<a name="l00225"></a>00225 
<a name="l00226"></a>00226                         <span class="comment">// Determine if page should be indexed, and if so, configure and initialize indexer</span>
<a name="l00227"></a>00227                 <span class="keywordflow">if</span> ($pObj-&gt;config['config']['index_enable'])    {
<a name="l00228"></a>00228                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#06968115af4b21de48d2dbbffaad57d3">log_push</a>('Index page<span class="charliteral">','</span>');
<a name="l00229"></a>00229 
<a name="l00230"></a>00230                         <span class="keywordflow">if</span> (!<a class="code" href="classtx__indexedsearch__indexer.html#7a87bbaa1b2b0ca40ad3cc24470bfac3">$indexerConfig</a>['disableFrontendIndexing'] || $this-&gt;crawlerActive) {
<a name="l00231"></a>00231                                 <span class="keywordflow">if</span> (!$pObj-&gt;page['no_search'])  {
<a name="l00232"></a>00232                                         <span class="keywordflow">if</span> (!$pObj-&gt;no_cache)   {
<a name="l00233"></a>00233                                                 <span class="keywordflow">if</span> (!strcmp($pObj-&gt;sys_language_uid,$pObj-&gt;sys_language_content))       {
<a name="l00234"></a>00234 
<a name="l00235"></a>00235                                                                 <span class="comment">// Setting up internal configuration from config array:</span>
<a name="l00236"></a>00236                                                         $this-&gt;conf = array();
<a name="l00237"></a>00237 
<a name="l00238"></a>00238                                                                 <span class="comment">// Information about page for which the indexing takes place</span>
<a name="l00239"></a>00239                                                         $this-&gt;conf['<span class="keywordtype">id</span>'] = $pObj-&gt;id;                          <span class="comment">// Page id</span>
<a name="l00240"></a>00240                                                         $this-&gt;conf['type'] = $pObj-&gt;type;                      <span class="comment">// Page type</span>
<a name="l00241"></a>00241                                                         $this-&gt;conf['sys_language_uid'] = $pObj-&gt;sys_language_uid;      <span class="comment">// sys_language UID of the language of the indexing.</span>
<a name="l00242"></a>00242                                                         $this-&gt;conf['MP'] = $pObj-&gt;MP;                          <span class="comment">// MP variable, if any (Mount Points)</span>
<a name="l00243"></a>00243                                                         $this-&gt;conf['gr_list'] = $pObj-&gt;gr_list;        <span class="comment">// Group list</span>
<a name="l00244"></a>00244 
<a name="l00245"></a>00245                                                         $this-&gt;conf['cHash'] = $pObj-&gt;cHash;                                    <span class="comment">// cHash string for additional parameters</span>
<a name="l00246"></a>00246                                                         $this-&gt;conf['cHash_array'] = $pObj-&gt;cHash_array;                <span class="comment">// Array of the additional parameters</span>
<a name="l00247"></a>00247 
<a name="l00248"></a>00248                                                         $this-&gt;conf['crdate'] = $pObj-&gt;page['crdate'];                  <span class="comment">// The creation date of the TYPO3 page</span>
<a name="l00249"></a>00249                                                         $this-&gt;conf['page_cache_reg1'] = $pObj-&gt;page_cache_reg1;        <span class="comment">// reg1 of the caching table. Not known what practical use this has.</span>
<a name="l00250"></a>00250 
<a name="l00251"></a>00251                                                                 <span class="comment">// Root line uids</span>
<a name="l00252"></a>00252                                                         $this-&gt;conf['rootline_uids'] = array();
<a name="l00253"></a>00253                                                         foreach($pObj-&gt;config['rootLine'] as $rlkey =&gt; $rldat)  {
<a name="l00254"></a>00254                                                                 $this-&gt;conf['rootline_uids'][$rlkey] = $rldat['uid'];
<a name="l00255"></a>00255                                                         }
<a name="l00256"></a>00256 
<a name="l00257"></a>00257                                                                 <span class="comment">// Content of page:</span>
<a name="l00258"></a>00258                                                         $this-&gt;conf['content'] = $pObj-&gt;content;                                        <span class="comment">// Content string (HTML of TYPO3 page)</span>
<a name="l00259"></a>00259                                                         $this-&gt;conf['indexedDocTitle'] = $pObj-&gt;convOutputCharset($pObj-&gt;indexedDocTitle);      <span class="comment">// Alternative title for indexing</span>
<a name="l00260"></a>00260                                                         $this-&gt;conf['metaCharset'] = $pObj-&gt;metaCharset;                        <span class="comment">// Character set of content (will be converted to utf-8 during indexing)</span>
<a name="l00261"></a>00261                                                         $this-&gt;conf['mtime'] = $pObj-&gt;register['SYS_LASTCHANGED'];      <span class="comment">// Most recent modification time (seconds) of the content on the page. Used to evaluate whether it should be re-indexed.</span>
<a name="l00262"></a>00262 
<a name="l00263"></a>00263                                                                 <span class="comment">// Configuration of behavior:</span>
<a name="l00264"></a>00264                                                         $this-&gt;conf['index_externals'] = $pObj-&gt;config['config']['index_externals'];    <span class="comment">// Whether to index external documents like PDF, DOC etc. (if possible)</span>
<a name="l00265"></a>00265                                                         $this-&gt;conf['index_descrLgd'] = $pObj-&gt;config['config']['index_descrLgd'];              <span class="comment">// Length of description text (max 250, default 200)</span>
<a name="l00266"></a>00266 
<a name="l00267"></a>00267                                                                 <span class="comment">// Set to zero:</span>
<a name="l00268"></a>00268                                                         $this-&gt;conf['recordUid'] = 0;
<a name="l00269"></a>00269                                                         $this-&gt;conf['freeIndexUid'] = 0;
<a name="l00270"></a>00270                                                         $this-&gt;conf['freeIndexSetId'] = 0;
<a name="l00271"></a>00271 
<a name="l00272"></a>00272                                                                 <span class="comment">// Init and start indexing:</span>
<a name="l00273"></a>00273                                                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#0230a60d469fe4b6e991a1b408630f14">init</a>();
<a name="l00274"></a>00274                                                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#d6d6183ca33b7d424d0c73b776a77582">indexTypo3PageContent</a>();
<a name="l00275"></a>00275                                                 } <span class="keywordflow">else</span> $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">log_setTSlogMessage</a>('Index page? No, -&gt;sys_language_uid was different from sys_language_content which indicates that the page contains fall-back content and that would be falsely indexed as localized content.');
<a name="l00276"></a>00276                                         } <span class="keywordflow">else</span> $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">log_setTSlogMessage</a>('Index page? No, page was set to <span class="stringliteral">"no_cache"</span> and so cannot be indexed.');
<a name="l00277"></a>00277                                 } <span class="keywordflow">else</span> $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">log_setTSlogMessage</a>('Index page? No, The <span class="stringliteral">"No Search"</span> flag has been set in the page properties!');
<a name="l00278"></a>00278                         } <span class="keywordflow">else</span> $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">log_setTSlogMessage</a>('Index page? No, Ordinary Frontend indexing during rendering is disabled.');
<a name="l00279"></a>00279                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#695fa4d399aa441de19913cf696b968d">log_pull</a>();
<a name="l00280"></a>00280                 }
<a name="l00281"></a>00281         }
<a name="l00282"></a>00282 
<a name="l00283"></a>00283 
<a name="l00284"></a>00284 
<a name="l00285"></a>00285 
<a name="l00286"></a>00286 
<a name="l00287"></a>00287 
<a name="l00288"></a>00288 
<a name="l00289"></a>00289 
<a name="l00290"></a>00290         <span class="comment">/****************************</span>
<a name="l00291"></a>00291 <span class="comment">         *</span>
<a name="l00292"></a>00292 <span class="comment">         * Backend API</span>
<a name="l00293"></a>00293 <span class="comment">         *</span>
<a name="l00294"></a>00294 <span class="comment">         ****************************/</span>
<a name="l00295"></a>00295 
<a name="l00308"></a><a class="code" href="classtx__indexedsearch__indexer.html#552fc858205cb2c47a3ac04fcfa8ddda">00308</a>         function <a class="code" href="classtx__indexedsearch__indexer.html#552fc858205cb2c47a3ac04fcfa8ddda">backend_initIndexer</a>($id, $type, $sys_language_uid, $MP, $uidRL, $cHash_array=array(), $createCHash=FALSE)      {
<a name="l00309"></a>00309 
<a name="l00310"></a>00310                         <span class="comment">// Setting up internal configuration from config array:</span>
<a name="l00311"></a>00311                 $this-&gt;conf = array();
<a name="l00312"></a>00312 
<a name="l00313"></a>00313                         <span class="comment">// Information about page for which the indexing takes place</span>
<a name="l00314"></a>00314                 $this-&gt;conf['<span class="keywordtype">id</span>'] = $id;                                <span class="comment">// Page id      (integer)</span>
<a name="l00315"></a>00315                 $this-&gt;conf['type'] = $type;                    <span class="comment">// Page type (integer)</span>
<a name="l00316"></a>00316                 $this-&gt;conf['sys_language_uid'] = $sys_language_uid;    <span class="comment">// sys_language UID of the language of the indexing (integer)</span>
<a name="l00317"></a>00317                 $this-&gt;conf['MP'] = $MP;                                <span class="comment">// MP variable, if any (Mount Points) (string)</span>
<a name="l00318"></a>00318                 $this-&gt;conf['gr_list'] = '0,-1';        <span class="comment">// Group list (hardcoded for now...)</span>
<a name="l00319"></a>00319 
<a name="l00320"></a>00320                         <span class="comment">// cHash values:</span>
<a name="l00321"></a>00321                 $this-&gt;conf['cHash'] = $createCHash ? $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#93fbc95c58d077c932a0857eb6106c15">makeCHash</a>($cHash_array) : '';      <span class="comment">// cHash string for additional parameters</span>
<a name="l00322"></a>00322                 $this-&gt;conf['cHash_array'] = $cHash_array;              <span class="comment">// Array of the additional parameters</span>
<a name="l00323"></a>00323 
<a name="l00324"></a>00324                         <span class="comment">// Set to defaults</span>
<a name="l00325"></a>00325                 $this-&gt;conf['freeIndexUid'] = 0;
<a name="l00326"></a>00326                 $this-&gt;conf['freeIndexSetId'] = 0;
<a name="l00327"></a>00327                 $this-&gt;conf['page_cache_reg1'] = '';
<a name="l00328"></a>00328 
<a name="l00329"></a>00329                         <span class="comment">// Root line uids</span>
<a name="l00330"></a>00330                 $this-&gt;conf['rootline_uids'] = $uidRL;
<a name="l00331"></a>00331 
<a name="l00332"></a>00332                         <span class="comment">// Configuration of behavior:</span>
<a name="l00333"></a>00333                 $this-&gt;conf['index_externals'] = 1;     <span class="comment">// Whether to index external documents like PDF, DOC etc. (if possible)</span>
<a name="l00334"></a>00334                 $this-&gt;conf['index_descrLgd'] = 200;            <span class="comment">// Length of description text (max 250, default 200)</span>
<a name="l00335"></a>00335 
<a name="l00336"></a>00336                         <span class="comment">// Init and start indexing:</span>
<a name="l00337"></a>00337                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#0230a60d469fe4b6e991a1b408630f14">init</a>();
<a name="l00338"></a>00338         }
<a name="l00339"></a>00339 
<a name="l00347"></a><a class="code" href="classtx__indexedsearch__indexer.html#e8164759d5c67b42b24430f72b0c2369">00347</a>         function <a class="code" href="classtx__indexedsearch__indexer.html#e8164759d5c67b42b24430f72b0c2369">backend_setFreeIndexUid</a>($freeIndexUid, $freeIndexSetId=0)      {
<a name="l00348"></a>00348                 $this-&gt;conf['freeIndexUid'] = $freeIndexUid;
<a name="l00349"></a>00349                 $this-&gt;conf['freeIndexSetId'] = $freeIndexSetId;
<a name="l00350"></a>00350         }
<a name="l00351"></a>00351 
<a name="l00365"></a><a class="code" href="classtx__indexedsearch__indexer.html#14aee4379b46c4656fd2d791f6b67ca9">00365</a>         function <a class="code" href="classtx__indexedsearch__indexer.html#14aee4379b46c4656fd2d791f6b67ca9">backend_indexAsTYPO3Page</a>($title, $keywords, $description, $content, $charset, $mtime, $crdate=0, $recordUid=0) {
<a name="l00366"></a>00366 
<a name="l00367"></a>00367                         <span class="comment">// Content of page:</span>
<a name="l00368"></a>00368                 $this-&gt;conf['mtime'] = $mtime;                  <span class="comment">// Most recent modification time (seconds) of the content</span>
<a name="l00369"></a>00369                 $this-&gt;conf['crdate'] = $crdate;                <span class="comment">// The creation date of the TYPO3 content</span>
<a name="l00370"></a>00370                 $this-&gt;conf['recordUid'] = $recordUid;  <span class="comment">// UID of the record, if applicable</span>
<a name="l00371"></a>00371 
<a name="l00372"></a>00372                         <span class="comment">// Construct fake HTML for parsing:</span>
<a name="l00373"></a>00373                 $this-&gt;conf['content'] = '
<a name="l00374"></a>00374                 &lt;html&gt;
<a name="l00375"></a>00375                         &lt;head&gt;
<a name="l00376"></a>00376                                 &lt;title&gt;'.htmlspecialchars($title).'&lt;/title&gt;
<a name="l00377"></a>00377                                 &lt;meta name=<span class="stringliteral">"keywords"</span> content=<span class="stringliteral">"'.htmlspecialchars($keywords).'"</span> /&gt;
<a name="l00378"></a>00378                                 &lt;meta name=<span class="stringliteral">"description"</span> content=<span class="stringliteral">"'.htmlspecialchars($description).'"</span> /&gt;
<a name="l00379"></a>00379                         &lt;/head&gt;
<a name="l00380"></a>00380                         &lt;body&gt;
<a name="l00381"></a>00381                                 '.htmlspecialchars($content).'
<a name="l00382"></a>00382                         &lt;/body&gt;
<a name="l00383"></a>00383                 &lt;/html&gt;';                                       <span class="comment">// Content string (HTML of TYPO3 page)</span>
<a name="l00384"></a>00384 
<a name="l00385"></a>00385                         <span class="comment">// Initializing charset:</span>
<a name="l00386"></a>00386                 $this-&gt;conf['metaCharset'] = $charset;                  <span class="comment">// Character set of content (will be converted to utf-8 during indexing)</span>
<a name="l00387"></a>00387                 $this-&gt;conf['indexedDocTitle'] = '';    <span class="comment">// Alternative title for indexing</span>
<a name="l00388"></a>00388 
<a name="l00389"></a>00389                         <span class="comment">// Index content as if it was a TYPO3 page:</span>
<a name="l00390"></a>00390                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#d6d6183ca33b7d424d0c73b776a77582">indexTypo3PageContent</a>();
<a name="l00391"></a>00391         }
<a name="l00392"></a>00392 
<a name="l00393"></a>00393 
<a name="l00394"></a>00394 
<a name="l00395"></a>00395 
<a name="l00396"></a>00396 
<a name="l00397"></a>00397 
<a name="l00398"></a>00398 
<a name="l00399"></a>00399 
<a name="l00400"></a>00400 
<a name="l00401"></a>00401 
<a name="l00402"></a>00402 
<a name="l00403"></a>00403 
<a name="l00404"></a>00404 
<a name="l00405"></a>00405         <span class="comment">/********************************</span>
<a name="l00406"></a>00406 <span class="comment">         *</span>
<a name="l00407"></a>00407 <span class="comment">         * Initialization</span>
<a name="l00408"></a>00408 <span class="comment">         *</span>
<a name="l00409"></a>00409 <span class="comment">         *******************************/</span>
<a name="l00410"></a>00410 
<a name="l00416"></a><a class="code" href="classtx__indexedsearch__indexer.html#0230a60d469fe4b6e991a1b408630f14">00416</a>         function <a class="code" href="classtx__indexedsearch__indexer.html#0230a60d469fe4b6e991a1b408630f14">init</a>() {
<a name="l00417"></a>00417                 global $TYPO3_CONF_VARS;
<a name="l00418"></a>00418 
<a name="l00419"></a>00419                         <span class="comment">// Initializing:</span>
<a name="l00420"></a>00420                 $this-&gt;cHashParams = $this-&gt;conf['cHash_array'];
<a name="l00421"></a>00421                 <span class="keywordflow">if</span> (is_array($this-&gt;cHashParams) &amp;&amp; count($this-&gt;cHashParams))  {
<a name="l00422"></a>00422                         <span class="keywordflow">if</span> ($this-&gt;conf['cHash'])       $this-&gt;cHashParams['cHash'] = $this-&gt;conf['cHash'];     <span class="comment">// Add this so that URL's come out right...</span>
<a name="l00423"></a>00423                         unset($this-&gt;cHashParams['encryptionKey']);             <span class="comment">// encryptionKey is added inside TSFE in order to calculate the cHash value and it should NOT be a part of this array!!! If it is it will be exposed in links!!!</span>
<a name="l00424"></a>00424                 }
<a name="l00425"></a>00425 
<a name="l00426"></a>00426                         <span class="comment">// Setting phash / phash_grouping which identifies the indexed page based on some of these variables:</span>
<a name="l00427"></a>00427                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#ba29b8cb5f0860797374b291a863b673">setT3Hashes</a>();
<a name="l00428"></a>00428 
<a name="l00429"></a>00429                         <span class="comment">// Indexer configuration from Extension Manager interface:</span>
<a name="l00430"></a>00430                 $this-&gt;indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
<a name="l00431"></a>00431                 $this-&gt;tstamp_minAge = <a class="code" href="classt3lib__div.html#79f6a47a7658e28e3f65666f8ebc19f8">t3lib_div::intInRange</a>($this-&gt;indexerConfig['minAge']*3600,0);
<a name="l00432"></a>00432                 $this-&gt;tstamp_maxAge = <a class="code" href="classt3lib__div.html#79f6a47a7658e28e3f65666f8ebc19f8">t3lib_div::intInRange</a>($this-&gt;indexerConfig['maxAge']*3600,0);
<a name="l00433"></a>00433                 $this-&gt;maxExternalFiles = <a class="code" href="classt3lib__div.html#79f6a47a7658e28e3f65666f8ebc19f8">t3lib_div::intInRange</a>($this-&gt;indexerConfig['maxExternalFiles'],0,1000,5);
<a name="l00434"></a>00434                 $this-&gt;flagBitMask = <a class="code" href="classt3lib__div.html#79f6a47a7658e28e3f65666f8ebc19f8">t3lib_div::intInRange</a>($this-&gt;indexerConfig['flagBitMask'],0,255);
<a name="l00435"></a>00435 
<a name="l00436"></a>00436                         <span class="comment">// Initialize external document parsers:</span>
<a name="l00437"></a>00437                         <span class="comment">// Example configuration, see ext_localconf.php of this file!</span>
<a name="l00438"></a>00438                 <span class="keywordflow">if</span> ($this-&gt;conf['index_externals'])     {
<a name="l00439"></a>00439                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#03fce626e53bdb3ccef7227147c8d525">initializeExternalParsers</a>();
<a name="l00440"></a>00440                 }
<a name="l00441"></a>00441 
<a name="l00442"></a>00442                         <span class="comment">// Initialize lexer (class that deconstructs the text into words):</span>
<a name="l00443"></a>00443                         <span class="comment">// Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['lexer'] = 'EXT:indexed_search/class.lexer.php:&amp;tx_indexedsearch_lexer';</span>
<a name="l00444"></a>00444                 $lexerObjRef = $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['lexer'] ?
<a name="l00445"></a>00445                                                 $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['lexer'] :
<a name="l00446"></a>00446                                                 'EXT:indexed_search/<span class="keyword">class</span>.lexer.php:&amp;<a class="code" href="classtx__indexedsearch__lexer.html">tx_indexedsearch_lexer</a>';
<a name="l00447"></a>00447                 $this-&gt;lexerObj = &amp;<a class="code" href="classt3lib__div.html#ebd81edfc92886dc4c625fc9fe53595f">t3lib_div::getUserObj</a>($lexerObjRef);
<a name="l00448"></a>00448                 $this-&gt;lexerObj-&gt;debug = $this-&gt;indexerConfig['debugMode'];
<a name="l00449"></a>00449 
<a name="l00450"></a>00450                         <span class="comment">// Initialize metaphone hook:</span>
<a name="l00451"></a>00451                         <span class="comment">// Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&amp;user_DoubleMetaPhone';</span>
<a name="l00452"></a>00452                 <span class="keywordflow">if</span> ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['<a class="code" href="classtx__indexedsearch__indexer.html#93b8a8ac71ee168856d1840cb6609473">metaphone</a>']) {
<a name="l00453"></a>00453                         $this-&gt;metaphoneObj = &amp;<a class="code" href="classt3lib__div.html#ebd81edfc92886dc4c625fc9fe53595f">t3lib_div::getUserObj</a>($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']);
<a name="l00454"></a>00454                         $this-&gt;metaphoneObj-&gt;pObj = &amp;$this;
<a name="l00455"></a>00455                 }
<a name="l00456"></a>00456 
<a name="l00457"></a>00457                         <span class="comment">// Init charset class:</span>
<a name="l00458"></a>00458                 $this-&gt;csObj = &amp;<a class="code" href="classt3lib__div.html#b47f8a8e8be44b79a0b8064dcd427bc1">t3lib_div::makeInstance</a>('<a class="code" href="classt3lib__cs.html">t3lib_cs</a>');
<a name="l00459"></a>00459         }
<a name="l00460"></a>00460 
<a name="l00468"></a><a class="code" href="classtx__indexedsearch__indexer.html#03fce626e53bdb3ccef7227147c8d525">00468</a>         function <a class="code" href="classtx__indexedsearch__indexer.html#03fce626e53bdb3ccef7227147c8d525">initializeExternalParsers</a>()    {
<a name="l00469"></a>00469                 global $TYPO3_CONF_VARS;
<a name="l00470"></a>00470 
<a name="l00471"></a>00471                 <span class="keywordflow">if</span> (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers']))        {
<a name="l00472"></a>00472                         foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension =&gt; $_objRef)    {
<a name="l00473"></a>00473                                 $this-&gt;external_parsers[$extension] = &amp;<a class="code" href="classt3lib__div.html#ebd81edfc92886dc4c625fc9fe53595f">t3lib_div::getUserObj</a>($_objRef);
<a name="l00474"></a>00474                                 $this-&gt;external_parsers[$extension]-&gt;pObj = &amp;$this;
<a name="l00475"></a>00475 
<a name="l00476"></a>00476                                         <span class="comment">// Init parser and if it returns false, unset its entry again:</span>
<a name="l00477"></a>00477                                 <span class="keywordflow">if</span> (!$this-&gt;external_parsers[$extension]-&gt;initParser($extension))       {
<a name="l00478"></a>00478                                         unset($this-&gt;external_parsers[$extension]);
<a name="l00479"></a>00479                                 }
<a name="l00480"></a>00480                         }
<a name="l00481"></a>00481                 }
<a name="l00482"></a>00482         }
<a name="l00483"></a>00483 
<a name="l00484"></a>00484 
<a name="l00485"></a>00485 
<a name="l00486"></a>00486 
<a name="l00487"></a>00487 
<a name="l00488"></a>00488 
<a name="l00489"></a>00489 
<a name="l00490"></a>00490 
<a name="l00491"></a>00491 
<a name="l00492"></a>00492 
<a name="l00493"></a>00493 
<a name="l00494"></a>00494 
<a name="l00495"></a>00495 
<a name="l00496"></a>00496 
<a name="l00497"></a>00497 
<a name="l00498"></a>00498         <span class="comment">/********************************</span>
<a name="l00499"></a>00499 <span class="comment">         *</span>
<a name="l00500"></a>00500 <span class="comment">         * Indexing; TYPO3 pages (HTML content)</span>
<a name="l00501"></a>00501 <span class="comment">         *</span>
<a name="l00502"></a>00502 <span class="comment">         *******************************/</span>
<a name="l00503"></a>00503 
<a name="l00509"></a><a class="code" href="classtx__indexedsearch__indexer.html#d6d6183ca33b7d424d0c73b776a77582">00509</a>         function <a class="code" href="classtx__indexedsearch__indexer.html#d6d6183ca33b7d424d0c73b776a77582">indexTypo3PageContent</a>()        {
<a name="l00510"></a>00510 
<a name="l00511"></a>00511                 $check = $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#907f6e3af30a437ac65b03875082dbc3">checkMtimeTstamp</a>($this-&gt;conf['mtime'], $this-&gt;hash['phash']);
<a name="l00512"></a>00512                 $is_grlist = $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c9f4be79a1cd07065d9310e938021bb6">is_grlist_set</a>($this-&gt;hash['phash']);
<a name="l00513"></a>00513 
<a name="l00514"></a>00514                 <span class="keywordflow">if</span> ($check &gt; 0 || !$is_grlist || $this-&gt;forceIndexing)  {
<a name="l00515"></a>00515 
<a name="l00516"></a>00516                                 <span class="comment">// Setting message:</span>
<a name="l00517"></a>00517                         <span class="keywordflow">if</span> ($this-&gt;forceIndexing)       {
<a name="l00518"></a>00518                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">log_setTSlogMessage</a>('Indexing needed, reason: Forced',1);
<a name="l00519"></a>00519                         } elseif ($check &gt; 0)   {
<a name="l00520"></a>00520                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">log_setTSlogMessage</a>('Indexing needed, reason: '.$this-&gt;reasons[$check],1);
<a name="l00521"></a>00521                         } <span class="keywordflow">else</span> {
<a name="l00522"></a>00522                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">log_setTSlogMessage</a>('Indexing needed, reason: Updates gr_list!',1);
<a name="l00523"></a>00523                         }
<a name="l00524"></a>00524 
<a name="l00525"></a>00525                                         <span class="comment">// Divide into title,keywords,description and body:</span>
<a name="l00526"></a>00526                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#06968115af4b21de48d2dbbffaad57d3">log_push</a>('Split content<span class="charliteral">','</span>');
<a name="l00527"></a>00527                                 $this-&gt;contentParts = $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#2a02ab65766324d91d152c9d9177cc4e">splitHTMLContent</a>($this-&gt;conf['content']);
<a name="l00528"></a>00528                                 <span class="keywordflow">if</span> ($this-&gt;conf['indexedDocTitle'])     {
<a name="l00529"></a>00529                                         $this-&gt;contentParts['title'] = $this-&gt;conf['indexedDocTitle'];
<a name="l00530"></a>00530                                 }
<a name="l00531"></a>00531                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#695fa4d399aa441de19913cf696b968d">log_pull</a>();
<a name="l00532"></a>00532 
<a name="l00533"></a>00533                                 <span class="comment">// Calculating a hash over what is to be the actual page content. Maybe this hash should not include title,description and keywords? The bodytext is the primary concern. (on the other hand a changed page-title would make no difference then, so dont!)</span>
<a name="l00534"></a>00534                         $this-&gt;content_md5h = $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#7652caa06b5e1ed0aaa3ad337c852b9b">md5inthash</a>(implode($this-&gt;contentParts,''));
<a name="l00535"></a>00535 
<a name="l00536"></a>00536                                 <span class="comment">// This function checks if there is already a page (with gr_list = 0,-1) indexed and if that page has the very same contentHash.</span>
<a name="l00537"></a>00537                                 <span class="comment">// If the contentHash is the same, then we can rest assured that this page is already indexed and regardless of mtime and origContent we don't need to do anything more.</span>
<a name="l00538"></a>00538                                 <span class="comment">// This will also prevent pages from being indexed if a fe_users has logged in and it turns out that the page content is not changed anyway. fe_users logged in should always search with hash_gr_list = "0,-1" OR "[their_group_list]". This situation will be prevented only if the page has been indexed with no user login on before hand. Else the page will be indexed by users until that event. However that does not present a serious problem.</span>
<a name="l00539"></a>00539                         $checkCHash = $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#63201744025e2e50dfde029c5a9c8bb8">checkContentHash</a>();
<a name="l00540"></a>00540                         <span class="keywordflow">if</span> (!is_array($checkCHash) || $check===1)       {
<a name="l00541"></a>00541                                 $Pstart=<a class="code" href="classt3lib__div.html#4351f76abf45f1fbae8dd0075744aa3c">t3lib_div::milliseconds</a>();
<a name="l00542"></a>00542 
<a name="l00543"></a>00543                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#06968115af4b21de48d2dbbffaad57d3">log_push</a>('Converting charset of content ('.$this-&gt;conf['metaCharset'].') to utf-8<span class="charliteral">','</span>');
<a name="l00544"></a>00544                                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#9149e698f38441a582ea2a01d6060e6b">charsetEntity2utf8</a>($this-&gt;contentParts,$this-&gt;conf['metaCharset']);
<a name="l00545"></a>00545                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#695fa4d399aa441de19913cf696b968d">log_pull</a>();
<a name="l00546"></a>00546 
<a name="l00547"></a>00547                                                 <span class="comment">// Splitting words</span>
<a name="l00548"></a>00548                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#06968115af4b21de48d2dbbffaad57d3">log_push</a>('Extract words from content<span class="charliteral">','</span>');
<a name="l00549"></a>00549                                         $splitInWords = $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#ef5baf9c2be6ef1b1051bc2cfdceb2a9">processWordsInArrays</a>($this-&gt;contentParts);
<a name="l00550"></a>00550                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#695fa4d399aa441de19913cf696b968d">log_pull</a>();
<a name="l00551"></a>00551 
<a name="l00552"></a>00552                                                 <span class="comment">// Analyse the indexed words.</span>
<a name="l00553"></a>00553                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#06968115af4b21de48d2dbbffaad57d3">log_push</a>('Analyse the extracted words<span class="charliteral">','</span>');
<a name="l00554"></a>00554                                         $indexArr = $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#ecd591dc0b5a02aad38bd390982fdf6d">indexAnalyze</a>($splitInWords);
<a name="l00555"></a>00555                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#695fa4d399aa441de19913cf696b968d">log_pull</a>();
<a name="l00556"></a>00556 
<a name="l00557"></a>00557                                                 <span class="comment">// Submitting page (phash) record</span>
<a name="l00558"></a>00558                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#06968115af4b21de48d2dbbffaad57d3">log_push</a>('Submitting page<span class="charliteral">','</span>');
<a name="l00559"></a>00559                                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#73b916e9285d639204a3c1dd6ea2deba">submitPage</a>();
<a name="l00560"></a>00560                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#695fa4d399aa441de19913cf696b968d">log_pull</a>();
<a name="l00561"></a>00561 
<a name="l00562"></a>00562                                                 <span class="comment">// Check words and submit to word list if not there</span>
<a name="l00563"></a>00563                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#06968115af4b21de48d2dbbffaad57d3">log_push</a>('Check word list and submit words<span class="charliteral">','</span>');
<a name="l00564"></a>00564                                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#0583b9fa452dc814b6be43f5c21e5d80">checkWordList</a>($indexArr);
<a name="l00565"></a>00565                                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c0faf81393d9b7ecdc7bdd1bf6987865">submitWords</a>($indexArr,$this-&gt;hash['phash']);
<a name="l00566"></a>00566                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#695fa4d399aa441de19913cf696b968d">log_pull</a>();
<a name="l00567"></a>00567 
<a name="l00568"></a>00568                                                 <span class="comment">// Set parsetime</span>
<a name="l00569"></a>00569                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#135006b58277d3b746e8beb0b405673d">updateParsetime</a>($this-&gt;hash['phash'],<a class="code" href="classt3lib__div.html#4351f76abf45f1fbae8dd0075744aa3c">t3lib_div::milliseconds</a>()-$Pstart);
<a name="l00570"></a>00570 
<a name="l00571"></a>00571                                                 <span class="comment">// Checking external files if configured for.</span>
<a name="l00572"></a>00572                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#06968115af4b21de48d2dbbffaad57d3">log_push</a>('Checking external files<span class="charliteral">','</span>');
<a name="l00573"></a>00573                                 <span class="keywordflow">if</span> ($this-&gt;conf['index_externals'])     {
<a name="l00574"></a>00574                                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#502fb2ad0c77d9e2ba8c5af4be067f98">extractLinks</a>($this-&gt;conf['content']);
<a name="l00575"></a>00575                                 }
<a name="l00576"></a>00576                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#695fa4d399aa441de19913cf696b968d">log_pull</a>();
<a name="l00577"></a>00577                         } <span class="keywordflow">else</span> {
<a name="l00578"></a>00578                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#ccf2c8b27a41013ae45c46a5d8991bb6">updateTstamp</a>($this-&gt;hash['phash'],$this-&gt;conf['mtime']); <span class="comment">// Update the timestatmp</span>
<a name="l00579"></a>00579                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#e4c93125d3b2259a59116538c11ab573">updateSetId</a>($this-&gt;hash['phash']);
<a name="l00580"></a>00580                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#ffbff13f630a15533bc87dde2f37dd81">update_grlist</a>($checkCHash['phash'],$this-&gt;hash['phash']);        <span class="comment">// $checkCHash['phash'] is the phash of the result row that is similar to the current phash regarding the content hash.</span>
<a name="l00581"></a>00581                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#efcb7dac883028cce374d4e6fcacbad0">updateRootline</a>();
<a name="l00582"></a>00582                                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">log_setTSlogMessage</a>('Indexing not needed, the contentHash, '.$this-&gt;content_md5h.', has not changed. Timestamp, grlist and rootline updated <span class="keywordflow">if</span> necessary.');
<a name="l00583"></a>00583                         }
<a name="l00584"></a>00584                 } <span class="keywordflow">else</span> {
<a name="l00585"></a>00585                         $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">log_setTSlogMessage</a>('Indexing not needed, reason: '.$this-&gt;reasons[$check]);
<a name="l00586"></a>00586                 }
<a name="l00587"></a>00587         }
<a name="l00588"></a>00588 
<a name="l00596"></a><a class="code" href="classtx__indexedsearch__indexer.html#2a02ab65766324d91d152c9d9177cc4e">00596</a>         function <a class="code" href="classtx__indexedsearch__indexer.html#2a02ab65766324d91d152c9d9177cc4e">splitHTMLContent</a>($content) {
<a name="l00597"></a>00597 
<a name="l00598"></a>00598                         <span class="comment">// divide head from body ( u-ouh :) )</span>
<a name="l00599"></a>00599                 $contentArr = $this-&gt;defaultContentArray;
<a name="l00600"></a>00600                 $contentArr['body'] = stristr($content,'&lt;body');
<a name="l00601"></a>00601                 $headPart = substr($content,0,-strlen($contentArr['body']));
<a name="l00602"></a>00602 
<a name="l00603"></a>00603                         <span class="comment">// get title</span>
<a name="l00604"></a>00604                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#961e50cc6dd20bbcf94a8a4e70412b95">embracingTags</a>($headPart,'TITLE',$contentArr['title'],$dummy2,$dummy);
<a name="l00605"></a>00605                 $titleParts = explode(<span class="charliteral">':'</span>,$contentArr['title'],2);
<a name="l00606"></a>00606                 $contentArr['title'] = trim(isset($titleParts[1]) ? $titleParts[1] : $titleParts[0]);
<a name="l00607"></a>00607 
<a name="l00608"></a>00608                         <span class="comment">// get keywords and description metatags</span>
<a name="l00609"></a>00609                 <span class="keywordflow">for</span>($i=0;$this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#961e50cc6dd20bbcf94a8a4e70412b95">embracingTags</a>($headPart,'meta',$dummy,$headPart,$meta[$i]);$i++) { <span class="comment">/*nothing*/</span> }
<a name="l00610"></a>00610                 <span class="keywordflow">for</span>($i=0;isset($meta[$i]);$i++) {
<a name="l00611"></a>00611                         $meta[$i] = <a class="code" href="classt3lib__div.html#040fb56ea1d21aa12d1157da4164cdfc">t3lib_div::get_tag_attributes</a>($meta[$i]);
<a name="l00612"></a>00612                         <span class="keywordflow">if</span>(stristr($meta[$i]['name'],'keywords')) $contentArr['keywords'].=<span class="charliteral">','</span>.$meta[$i]['content'];
<a name="l00613"></a>00613                         <span class="keywordflow">if</span>(stristr($meta[$i]['name'],'description')) $contentArr['description'].=<span class="charliteral">','</span>.$meta[$i]['content'];
<a name="l00614"></a>00614                 }
<a name="l00615"></a>00615 
<a name="l00616"></a>00616                         <span class="comment">// Process &lt;!--TYPO3SEARCH_begin--&gt; or &lt;!--TYPO3SEARCH_end--&gt; tags:</span>
<a name="l00617"></a>00617                 $this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#dcafe071cc8d331f573bd995e976e394">typoSearchTags</a>($contentArr['body']);
<a name="l00618"></a>00618 
<a name="l00619"></a>00619                         <span class="comment">// Get rid of unwanted sections (ie. scripting and style stuff) in body</span>
<a name="l00620"></a>00620                 $tagList = explode(<span class="charliteral">','</span>,$this-&gt;excludeSections);
<a name="l00621"></a>00621                 foreach($tagList as $tag)       {
<a name="l00622"></a>00622                         <span class="keywordflow">while</span>($this-&gt;<a class="code" href="classtx__indexedsearch__indexer.html#961e50cc6dd20bbcf94a8a4e70412b95">embracingTags</a>($contentArr['body'],$tag,$dummy,$contentArr['body'],$dummy2));
<a name="l00623"></a>00623                 }
<a name="l00624"></a>00624 
<a name="l00625"></a>00625                         <span class="comment">// remove tags, but first make sure we don't concatenate words by doing it</span>
<a name="l00626"></a>00626                 $contentArr['body'] = str_replace(<span class="charliteral">'&lt;'</span>,' &lt;',$contentArr['body']);
<a name="l00627"></a>00627                 $contentArr['body'] = trim(strip_tags($contentArr['body']));
<a name="l00628"></a>00628 
<a name="l00629"></a>00629                 $contentArr['keywords'] = trim($contentArr['keywords']);
<a name="l00630"></a>00630                 $contentArr['description'] = trim($contentArr['description']);
<a name="l00631"></a>00631 
<a name="l00632"></a>00632                         <span class="comment">// Return array</span>
<a name="l00633"></a>00633                 <span class="keywordflow">return</span> $contentArr;
<a name="l00634"></a>00634         }
<a name="l00635"></a>00635 
<a name="l00642"></a><a class="code" href="classtx__indexedsearch__indexer.html#e18c6163e12f25c340c91bb1af970d39">00642</a>         function getHTMLcharset($content)       {
<a name="l00643"></a>00643                 <span class="keywordflow">if</span> (eregi('&lt;meta[[:space:]]+[^&gt;]*http-equiv[[:space:]]*=[[:space:]]*[<span class="stringliteral">"\']CONTENT-TYPE["</span>\'][^&gt;]*&gt;',$content,$reg))       {
<a name="l00644"></a>00644                         <span class="keywordflow">if</span> (eregi('charset[[:space:]]*=[[:space:]]*([[:alnum:]-]+)',$reg[0],$reg2))     {
<a name="l00645"></a>00645                                 <span class="keywordflow">return</span> $reg2[1];
<a name="l00646"></a>00646                         }
<a name="l00647"></a>00647                 }
<a name="l00648"></a>00648         }
<a name="l00649"></a>00649 
<a name="l00657"></a><a class="code" href="classtx__indexedsearch__indexer.html#6dbe55749a6b60d0cfb30dfee66d6b6b">00657</a>         function convertHTMLToUtf8($content,$charset='')        {
<a name="l00658"></a>00658 
<a name="l00659"></a>00659                         <span class="comment">// Find charset:</span>
<a name="l00660"></a>00660                 $charset = $charset ? $charset : $this-&gt;getHTMLcharset($content);
<a name="l00661"></a>00661                 $charset = $this-&gt;csObj-&gt;parse_charset($charset);
<a name="l00662"></a>00662 
<a name="l00663"></a>00663                         <span class="comment">// Convert charset:</span>
<a name="l00664"></a>00664                 <span class="keywordflow">if</span> ($charset &amp;&amp; $charset!=='utf-8')     {
<a name="l00665"></a>00665                         $content = $this-&gt;csObj-&gt;utf8_encode($content, $charset);
<a name="l00666"></a>00666                 }
<a name="l00667"></a>00667                         <span class="comment">// Convert entities, assuming document is now UTF-8:</span>
<a name="l00668"></a>00668                 $content = $this-&gt;csObj-&gt;entities_to_utf8($content, TRUE);
<a name="l00669"></a>00669 
<a name="l00670"></a>00670                 <span class="keywordflow">return</span> $content;
<a name="l00671"></a>00671         }
<a name="l00672"></a>00672 
<a name="l00685"></a><a class="code" href="classtx__indexedsearch__indexer.html#961e50cc6dd20bbcf94a8a4e70412b95">00685</a>         function embracingTags($string,$tagName,&amp;$tagContent,&amp;$stringAfter,&amp;$paramList) {
<a name="l00686"></a>00686                 $endTag = '&lt;/'.$tagName.<span class="charliteral">'&gt;'</span>;
<a name="l00687"></a>00687                 $startTag = <span class="charliteral">'&lt;'</span>.$tagName;
<a name="l00688"></a>00688 
<a name="l00689"></a>00689                 $isTagInText = stristr($string,$startTag);              <span class="comment">// stristr used because we want a case-insensitive search for the tag.</span>
<a name="l00690"></a>00690                 <span class="keywordflow">if</span>(!$isTagInText) <span class="keywordflow">return</span> <span class="keyword">false</span>; <span class="comment">// if the tag was not found, return false</span>
<a name="l00691"></a>00691 
<a name="l00692"></a>00692                 list($paramList,$isTagInText) = explode(<span class="charliteral">'&gt;'</span>,substr($isTagInText,strlen($startTag)),2);
<a name="l00693"></a>00693                 $afterTagInText = stristr($isTagInText,$endTag);
<a name="l00694"></a>00694                 <span class="keywordflow">if</span> ($afterTagInText)    {
<a name="l00695"></a>00695                         $stringBefore = substr($string, 0, strpos(strtolower($string), strtolower($startTag)));
<a name="l00696"></a>00696                         $tagContent = substr($isTagInText,0,strlen($isTagInText)-strlen($afterTagInText));
<a name="l00697"></a>00697                         $stringAfter = $stringBefore.substr($afterTagInText,strlen($endTag));
<a name="l00698"></a>00698                 } <span class="keywordflow">else</span> {        <span class="comment">// If there was no ending tag, the tagContent is blank and anything after the tag it self is returned.</span>
<a name="l00699"></a>00699                         $tagContent='';
<a name="l00700"></a>00700                         $stringAfter = $isTagInText;
<a name="l00701"></a>00701                 }
<a name="l00702"></a>00702 
<a name="l00703"></a>00703                 <span class="keywordflow">return</span> <span class="keyword">true</span>;
<a name="l00704"></a>00704         }
<a name="l00705"></a>00705 
<a name="l00712"></a><a class="code" href="classtx__indexedsearch__indexer.html#dcafe071cc8d331f573bd995e976e394">00712</a>         function typoSearchTags(&amp;$body) {
<a name="l00713"></a>00713                 $expBody = preg_split('/\&lt;\!\-\-[\s]?TYPO3SEARCH_/',$body);
<a name="l00714"></a>00714 
<a name="l00715"></a>00715                 <span class="keywordflow">if</span>(count($expBody)&gt;1) {
<a name="l00716"></a>00716                         $body = '';
<a name="l00717"></a>00717 
<a name="l00718"></a>00718                         foreach($expBody as $val)       {
<a name="l00719"></a>00719                                 $part = explode('--&gt;',$val,2);
<a name="l00720"></a>00720                                 <span class="keywordflow">if</span>(trim($part[0])=='begin') {
<a name="l00721"></a>00721                                         $body.= $part[1];
<a name="l00722"></a>00722                                         $prev = '';
<a name="l00723"></a>00723                                 } elseif(trim($part[0])=='end') {
<a name="l00724"></a>00724                                         $body.= $prev;
<a name="l00725"></a>00725                                 } <span class="keywordflow">else</span> {
<a name="l00726"></a>00726                                         $prev = $val;
<a name="l00727"></a>00727                                 }
<a name="l00728"></a>00728                         }
<a name="l00729"></a>00729                         <span class="keywordflow">return</span> <span class="keyword">true</span>;
<a name="l00730"></a>00730                 } <span class="keywordflow">else</span> {
<a name="l00731"></a>00731                         <span class="keywordflow">return</span> <span class="keyword">false</span>;
<a name="l00732"></a>00732                 }
<a name="l00733"></a>00733         }
<a name="l00734"></a>00734 
<a name="l00741"></a><a class="code" href="classtx__indexedsearch__indexer.html#502fb2ad0c77d9e2ba8c5af4be067f98">00741</a>         function extractLinks($content) {
<a name="l00742"></a>00742 
<a name="l00743"></a>00743                         <span class="comment">// Get links:</span>
<a name="l00744"></a>00744                 $list = $this-&gt;extractHyperLinks($content);
<a name="l00745"></a>00745 
<a name="l00746"></a>00746                 <span class="keywordflow">if</span> ($this-&gt;indexerConfig['useCrawlerForExternalFiles'] &amp;&amp; <a class="code" href="classt3lib__extMgm.html#297116a3b1e17045ff193f170b8c4a29">t3lib_extMgm::isLoaded</a>('crawler'))    {
<a name="l00747"></a>00747                         $this-&gt;includeCrawlerClass();
<a name="l00748"></a>00748                         $crawler = <a class="code" href="classt3lib__div.html#b47f8a8e8be44b79a0b8064dcd427bc1">t3lib_div::makeInstance</a>('tx_crawler_lib');
<a name="l00749"></a>00749                 }
<a name="l00750"></a>00750 
<a name="l00751"></a>00751                         <span class="comment">// Traverse links:</span>
<a name="l00752"></a>00752                 foreach($list as $linkInfo)     {
<a name="l00753"></a>00753 
<a name="l00754"></a>00754                                 <span class="comment">// Decode entities:</span>
<a name="l00755"></a>00755                         <span class="keywordflow">if</span> ($linkInfo['localPath'])     {       <span class="comment">// localPath means: This file is sent by a download script. While the indexed URL has to point to $linkInfo['href'], the absolute path to the file is specified here!</span>
<a name="l00756"></a>00756                                 $linkSource = <a class="code" href="classt3lib__div.html#f240b4a85b87436f4eba15dd26487991">t3lib_div::htmlspecialchars_decode</a>($linkInfo['localPath']);
<a name="l00757"></a>00757                         } <span class="keywordflow">else</span> {
<a name="l00758"></a>00758                                 $linkSource = <a class="code" href="classt3lib__div.html#f240b4a85b87436f4eba15dd26487991">t3lib_div::htmlspecialchars_decode</a>($linkInfo['href']);
<a name="l00759"></a>00759                         }
<a name="l00760"></a>00760 
<a name="l00761"></a>00761                                 <span class="comment">// Parse URL:</span>
<a name="l00762"></a>00762                         $qParts = parse_url($linkSource);
<a name="l00763"></a>00763 
<a name="l00764"></a>00764                                 <span class="comment">// Check for jumpurl (TYPO3 specific thing...)</span>
<a name="l00765"></a>00765                         <span class="keywordflow">if</span> ($qParts['query'] &amp;&amp; strstr($qParts['query'],'jumpurl='))    {
<a name="l00766"></a>00766                                 parse_str($qParts['query'],$getP);
<a name="l00767"></a>00767                                 $linkSource = $getP['jumpurl'];
<a name="l00768"></a>00768                                 $qParts = parse_url($linkSource);       <span class="comment">// parse again due to new linkSource!</span>
<a name="l00769"></a>00769                         }
<a name="l00770"></a>00770 
<a name="l00771"></a>00771                         <span class="keywordflow">if</span> ($qParts['scheme'])  {
<a name="l00772"></a>00772                                 <span class="keywordflow">if</span> ($this-&gt;indexerConfig['indexExternalURLs'])  {
<a name="l00773"></a>00773                                                 <span class="comment">// Index external URL (http or otherwise)</span>
<a name="l00774"></a>00774                                         $this-&gt;indexExternalUrl($linkSource);
<a name="l00775"></a>00775                                 }
<a name="l00776"></a>00776                         } elseif (!$qParts['query']) {
<a name="l00777"></a>00777                                 <span class="keywordflow">if</span> (<a class="code" href="classt3lib__div.html#6832637b0bc8de6c74768bbcbc868832">t3lib_div::isAllowedAbsPath</a>($linkSource))   {
<a name="l00778"></a>00778                                         $localFile = $linkSource;
<a name="l00779"></a>00779                                 } <span class="keywordflow">else</span> {
<a name="l00780"></a>00780                                         $localFile = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>(PATH_site.$linkSource);
<a name="l00781"></a>00781                                 }
<a name="l00782"></a>00782                                 <span class="keywordflow">if</span> ($localFile &amp;&amp; @is_file($localFile)) {
<a name="l00783"></a>00783 
<a name="l00784"></a>00784                                                 <span class="comment">// Index local file:</span>
<a name="l00785"></a>00785                                         <span class="keywordflow">if</span> ($linkInfo['localPath'])     {
<a name="l00786"></a>00786 
<a name="l00787"></a>00787                                                 $fI = pathinfo($linkSource);
<a name="l00788"></a>00788                                                 $ext = strtolower($fI['extension']);
<a name="l00789"></a>00789                                                 <span class="keywordflow">if</span> (is_object($crawler))        {
<a name="l00790"></a>00790                                                         $params = array(
<a name="l00791"></a>00791                                                                 'document' =&gt; $linkSource,
<a name="l00792"></a>00792                                                                 'alturl' =&gt; $linkInfo['href'],
<a name="l00793"></a>00793                                                                 'conf' =&gt; $this-&gt;conf
<a name="l00794"></a>00794                                                         );
<a name="l00795"></a>00795                                                         unset($params['conf']['content']);
<a name="l00796"></a>00796 
<a name="l00797"></a>00797                                                         $crawler-&gt;addQueueEntry_callBack(0,$params,'EXT:indexed_search/<span class="keyword">class</span>.crawler.php:&amp;<a class="code" href="classtx__indexedsearch__files.html">tx_indexedsearch_files</a>',$this-&gt;conf['<span class="keywordtype">id</span>']);
<a name="l00798"></a>00798                                                         $this-&gt;log_setTSlogMessage('media <span class="stringliteral">"'.$params['document'].'"</span> added to <span class="stringliteral">"crawler"</span> queue.',1);
<a name="l00799"></a>00799                                                 } <span class="keywordflow">else</span> {
<a name="l00800"></a>00800                                                         $this-&gt;indexRegularDocument($linkInfo['href'], <span class="keyword">false</span>, $linkSource, $ext);
<a name="l00801"></a>00801                                                 }
<a name="l00802"></a>00802                                         } <span class="keywordflow">else</span> {
<a name="l00803"></a>00803                                                 <span class="keywordflow">if</span> (is_object($crawler))        {
<a name="l00804"></a>00804                                                         $params = array(
<a name="l00805"></a>00805                                                                 'document' =&gt; $linkSource,
<a name="l00806"></a>00806                                                                 'conf' =&gt; $this-&gt;conf
<a name="l00807"></a>00807                                                         );
<a name="l00808"></a>00808                                                         unset($params['conf']['content']);
<a name="l00809"></a>00809                                                         $crawler-&gt;addQueueEntry_callBack(0,$params,'EXT:indexed_search/<span class="keyword">class</span>.crawler.php:&amp;<a class="code" href="classtx__indexedsearch__files.html">tx_indexedsearch_files</a>',$this-&gt;conf['<span class="keywordtype">id</span>']);
<a name="l00810"></a>00810                                                         $this-&gt;log_setTSlogMessage('media <span class="stringliteral">"'.$params['document'].'"</span> added to <span class="stringliteral">"crawler"</span> queue.',1);
<a name="l00811"></a>00811                                                 } <span class="keywordflow">else</span> {
<a name="l00812"></a>00812                                                         $this-&gt;indexRegularDocument($linkSource);
<a name="l00813"></a>00813                                                 }
<a name="l00814"></a>00814                                         }
<a name="l00815"></a>00815                                 }
<a name="l00816"></a>00816                         }
<a name="l00817"></a>00817                 }
<a name="l00818"></a>00818         }
<a name="l00819"></a>00819 
<a name="l00827"></a><a class="code" href="classtx__indexedsearch__indexer.html#35fb1310eb8ae2e0a636c53e19e3d1e7">00827</a>         function extractHyperLinks($string)     {
<a name="l00828"></a>00828                 <span class="keywordflow">if</span> (!is_object($this-&gt;htmlParser))      {
<a name="l00829"></a>00829                         $this-&gt;htmlParser = <a class="code" href="classt3lib__div.html#b47f8a8e8be44b79a0b8064dcd427bc1">t3lib_div::makeInstance</a>('t3lib_parseHtml');
<a name="l00830"></a>00830                 }
<a name="l00831"></a>00831 
<a name="l00832"></a>00832                 $parts = $this-&gt;htmlParser-&gt;splitTags(<span class="charliteral">'a'</span>,$string);
<a name="l00833"></a>00833                 $list = array();
<a name="l00834"></a>00834                 foreach ($parts as $k =&gt; $v)    {
<a name="l00835"></a>00835                         <span class="keywordflow">if</span> ($k%2)       {
<a name="l00836"></a>00836                                 $params = $this-&gt;htmlParser-&gt;get_tag_attributes($v,1);
<a name="l00837"></a>00837                                 $firstTagName = $this-&gt;htmlParser-&gt;getFirstTagName($v); <span class="comment">// The 'name' of the first tag</span>
<a name="l00838"></a>00838 
<a name="l00839"></a>00839                                 <span class="keywordflow">switch</span> (strtolower($firstTagName))      {
<a name="l00840"></a>00840                                         <span class="keywordflow">case</span> <span class="charliteral">'a'</span>:
<a name="l00841"></a>00841                                                 $src = $params[0]['href'];
<a name="l00842"></a>00842                                                 <span class="keywordflow">if</span> ($src)       {
<a name="l00843"></a>00843                                                                 <span class="comment">// Check if a local path to that file has been set - useful if you are using a download script.</span>
<a name="l00844"></a>00844                                                         $md5 = <a class="code" href="classt3lib__div.html#1e4206f70282a7cafb2c42c9fe6c1d5e">t3lib_div::shortMD5</a>($src);
<a name="l00845"></a>00845                                                         <span class="keywordflow">if</span> (is_array($indexLocalFiles=$GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles']))  {
<a name="l00846"></a>00846                                                                 $localPath = isset($indexLocalFiles[$md5]) ? $indexLocalFiles[$md5] : '';
<a name="l00847"></a>00847                                                         } <span class="keywordflow">else</span> $localPath=<span class="keyword">false</span>;
<a name="l00848"></a>00848 
<a name="l00849"></a>00849                                                         $list[] = array(
<a name="l00850"></a>00850                                                                 'tag' =&gt; $v,
<a name="l00851"></a>00851                                                                 'href' =&gt; $params[0]['href'],
<a name="l00852"></a>00852                                                                 'localPath' =&gt; $localPath
<a name="l00853"></a>00853                                                         );
<a name="l00854"></a>00854                                                 }
<a name="l00855"></a>00855                                         <span class="keywordflow">break</span>;
<a name="l00856"></a>00856                                 }
<a name="l00857"></a>00857                         }
<a name="l00858"></a>00858                 }
<a name="l00859"></a>00859 
<a name="l00860"></a>00860                 <span class="keywordflow">return</span> $list;
<a name="l00861"></a>00861         }
<a name="l00862"></a>00862 
<a name="l00863"></a>00863 
<a name="l00864"></a>00864 
<a name="l00865"></a>00865 
<a name="l00866"></a>00866 
<a name="l00867"></a>00867 
<a name="l00868"></a>00868 
<a name="l00869"></a>00869 
<a name="l00870"></a>00870 
<a name="l00871"></a>00871 
<a name="l00872"></a>00872 
<a name="l00873"></a>00873         <span class="comment">/******************************************</span>
<a name="l00874"></a>00874 <span class="comment">         *</span>
<a name="l00875"></a>00875 <span class="comment">         * Indexing; external URL</span>
<a name="l00876"></a>00876 <span class="comment">         *</span>
<a name="l00877"></a>00877 <span class="comment">         ******************************************/</span>
<a name="l00878"></a>00878 
<a name="l00886"></a><a class="code" href="classtx__indexedsearch__indexer.html#b8353e9ef612e88c07e7bb4a2d94ed9c">00886</a>         function indexExternalUrl($externalUrl) {
<a name="l00887"></a>00887 
<a name="l00888"></a>00888                         <span class="comment">// Parse External URL:</span>
<a name="l00889"></a>00889                 $qParts = parse_url($externalUrl);
<a name="l00890"></a>00890                 $fI = pathinfo($qParts['path']);
<a name="l00891"></a>00891                 $ext = strtolower($fI['extension']);
<a name="l00892"></a>00892 
<a name="l00893"></a>00893                         <span class="comment">// Get headers:</span>
<a name="l00894"></a>00894                 $urlHeaders = $this-&gt;getUrlHeaders($externalUrl);
<a name="l00895"></a>00895                 <span class="keywordflow">if</span> (stristr($urlHeaders['Content-Type'],'text/html'))   {
<a name="l00896"></a>00896                         $content = $this-&gt;indexExternalUrl_content = t3lib_div::getUrl($externalUrl);
<a name="l00897"></a>00897                         <span class="keywordflow">if</span> (strlen($content))   {
<a name="l00898"></a>00898 
<a name="l00899"></a>00899                                         <span class="comment">// Create temporary file:</span>
<a name="l00900"></a>00900                                 $tmpFile = <a class="code" href="classt3lib__div.html#e126c1b5d0f72003e39c2930d5f65f07">t3lib_div::tempnam</a>('EXTERNAL_URL').'.html';
<a name="l00901"></a>00901                                 <a class="code" href="classt3lib__div.html#d6d2efcc3ac43b2bf2ebacc98d000292">t3lib_div::writeFile</a>($tmpFile, $content);
<a name="l00902"></a>00902 
<a name="l00903"></a>00903                                         <span class="comment">// Index that file:</span>
<a name="l00904"></a>00904                                 $this-&gt;indexRegularDocument($externalUrl, TRUE, $tmpFile, 'html');      <span class="comment">// Using "TRUE" for second parameter to force indexing of external URLs (mtime doesn't make sense, does it?)</span>
<a name="l00905"></a>00905                                 unlink($tmpFile);
<a name="l00906"></a>00906                         }
<a name="l00907"></a>00907                 }
<a name="l00908"></a>00908         }
<a name="l00909"></a>00909 
<a name="l00917"></a><a class="code" href="classtx__indexedsearch__indexer.html#e341c578a27870656040bf3221b6bb6c">00917</a>         function getUrlHeaders($url)    {
<a name="l00918"></a>00918                 $content = <a class="code" href="classt3lib__div.html#cc04add6fb893b25776fa3ead7cd50c8">t3lib_div::getURL</a>($url,2);   <span class="comment">// Try to get the headers only</span>
<a name="l00919"></a>00919 
<a name="l00920"></a>00920                 <span class="keywordflow">if</span> (strlen($content))   {
<a name="l00921"></a>00921                                 <span class="comment">// Compile headers:</span>
<a name="l00922"></a>00922                         $headers = <a class="code" href="classt3lib__div.html#cfb87d585b85e9b32841bde40beaa96c">t3lib_div::trimExplode</a>(chr(10),$content,1);
<a name="l00923"></a>00923                         $retVal = array();
<a name="l00924"></a>00924                         foreach($headers as $line)      {
<a name="l00925"></a>00925                                 <span class="keywordflow">if</span> (!strlen(trim($line)))       {
<a name="l00926"></a>00926                                         <span class="keywordflow">break</span>;  <span class="comment">// Stop at the first empty line (= end of header)</span>
<a name="l00927"></a>00927                                 }
<a name="l00928"></a>00928 
<a name="l00929"></a>00929                                 list($headKey, $headValue) = explode(<span class="charliteral">':'</span>, $line, 2);
<a name="l00930"></a>00930                                 $retVal[$headKey] = $headValue;
<a name="l00931"></a>00931                         }
<a name="l00932"></a>00932                         <span class="keywordflow">return</span> $retVal;
<a name="l00933"></a>00933                 }
<a name="l00934"></a>00934         }
<a name="l00935"></a>00935 
<a name="l00936"></a>00936 
<a name="l00937"></a>00937 
<a name="l00938"></a>00938 
<a name="l00939"></a>00939 
<a name="l00940"></a>00940 
<a name="l00941"></a>00941 
<a name="l00942"></a>00942 
<a name="l00943"></a>00943 
<a name="l00944"></a>00944 
<a name="l00945"></a>00945 
<a name="l00946"></a>00946 
<a name="l00947"></a>00947 
<a name="l00948"></a>00948         <span class="comment">/******************************************</span>
<a name="l00949"></a>00949 <span class="comment">         *</span>
<a name="l00950"></a>00950 <span class="comment">         * Indexing; external files (PDF, DOC, etc)</span>
<a name="l00951"></a>00951 <span class="comment">         *</span>
<a name="l00952"></a>00952 <span class="comment">         ******************************************/</span>
<a name="l00953"></a>00953 
<a name="l00963"></a><a class="code" href="classtx__indexedsearch__indexer.html#85c3bad8606fab06034b7b917d661cc2">00963</a>         function indexRegularDocument($file, $force=FALSE, $contentTmpFile='', $altExtension='')        {
<a name="l00964"></a>00964 
<a name="l00965"></a>00965                         <span class="comment">// Init</span>
<a name="l00966"></a>00966                 $fI = pathinfo($file);
<a name="l00967"></a>00967                 $ext = $altExtension ? $altExtension : strtolower($fI['extension']);
<a name="l00968"></a>00968 
<a name="l00969"></a>00969                         <span class="comment">// Create abs-path:</span>
<a name="l00970"></a>00970                 <span class="keywordflow">if</span> (!$contentTmpFile)   {
<a name="l00971"></a>00971                         <span class="keywordflow">if</span> (!<a class="code" href="classt3lib__div.html#087c8553dcfca3a0a9aedfd2affd68a0">t3lib_div::isAbsPath</a>($file))       {       <span class="comment">// Relative, prepend PATH_site:</span>
<a name="l00972"></a>00972                                 $absFile = <a class="code" href="classt3lib__div.html#ad5d27aeabb41e7f5ed6ddaf760de42a">t3lib_div::getFileAbsFileName</a>(PATH_site.$file);
<a name="l00973"></a>00973                         } <span class="keywordflow">else</span> {        <span class="comment">// Absolute, pass-through:</span>
<a name="l00974"></a>00974                                 $absFile = $file;
<a name="l00975"></a>00975                         }
<a name="l00976"></a>00976                         $absFile = <a class="code" href="classt3lib__div.html#6832637b0bc8de6c74768bbcbc868832">t3lib_div::isAllowedAbsPath</a>($absFile) ? $absFile : '';
<a name="l00977"></a>00977                 } <span class="keywordflow">else</span> {
<a name="l00978"></a>00978                         $absFile = $contentTmpFile;
<a name="l00979"></a>00979                 }
<a name="l00980"></a>00980 
<a name="l00981"></a>00981                         <span class="comment">// Indexing the document:</span>
<a name="l00982"></a>00982                 <span class="keywordflow">if</span> ($absFile &amp;&amp; @is_file($absFile))     {
<a name="l00983"></a>00983                         <span class="keywordflow">if</span> ($this-&gt;external_parsers[$ext])      {
<a name="l00984"></a>00984                                 $mtime = filemtime($absFile);
<a name="l00985"></a>00985                                 $cParts = $this-&gt;fileContentParts($ext,$absFile);
<a name="l00986"></a>00986 
<a name="l00987"></a>00987                                 foreach($cParts as $cPKey)      {
<a name="l00988"></a>00988                                         $this-&gt;internal_log = array();
<a name="l00989"></a>00989                                         $this-&gt;log_push('Index: '.str_replace(<span class="charliteral">'.'</span>,<span class="charliteral">'_'</span>,basename($file)).($cPKey?<span class="charliteral">'#'</span>.$cPKey:''),'');
<a name="l00990"></a>00990                                         $Pstart = <a class="code" href="classt3lib__div.html#4351f76abf45f1fbae8dd0075744aa3c">t3lib_div::milliseconds</a>();
<a name="l00991"></a>00991                                         $subinfo = array('key' =&gt; $cPKey);      <span class="comment">// Setting page range. This is "0" (zero) when no division is made, otherwise a range like "1-3"</span>
<a name="l00992"></a>00992                                         $phash_arr = $this-&gt;file_phash_arr = $this-&gt;setExtHashes($file,$subinfo);
<a name="l00993"></a>00993                                         $check = $this-&gt;checkMtimeTstamp($mtime, $phash_arr['phash']);
<a name="l00994"></a>00994                                         <span class="keywordflow">if</span> ($check &gt; 0 || $force)       {
<a name="l00995"></a>00995                                                 <span class="keywordflow">if</span> ($check &gt; 0) {
<a name="l00996"></a>00996                                                         $this-&gt;log_setTSlogMessage('Indexing needed, reason: '.$this-&gt;reasons[$check],1);
<a name="l00997"></a>00997                                                 } <span class="keywordflow">else</span> {
<a name="l00998"></a>00998                                                         $this-&gt;log_setTSlogMessage('Indexing forced by flag',1);
<a name="l00999"></a>00999                                                 }
<a name="l01000"></a>01000 
<a name="l01001"></a>01001                                                         <span class="comment">// Check external file counter:</span>
<a name="l01002"></a>01002                                                 <span class="keywordflow">if</span> ($this-&gt;externalFileCounter &lt; $this-&gt;maxExternalFiles || $force)     {
<a name="l01003"></a>01003 
<a name="l01004"></a>01004                                                                         <span class="comment">// Divide into title,keywords,description and body:</span>
<a name="l01005"></a>01005                                                         $this-&gt;log_push('Split content<span class="charliteral">','</span>');
<a name="l01006"></a>01006                                                                 $contentParts = $this-&gt;readFileContent($ext,$absFile,$cPKey);
<a name="l01007"></a>01007                                                         $this-&gt;log_pull();
<a name="l01008"></a>01008 
<a name="l01009"></a>01009                                                         <span class="keywordflow">if</span> (is_array($contentParts))    {
<a name="l01010"></a>01010                                                                         <span class="comment">// Calculating a hash over what is to be the actual content. (see indexTypo3PageContent())</span>
<a name="l01011"></a>01011                                                                 $content_md5h = $this-&gt;md5inthash(implode($contentParts,''));
<a name="l01012"></a>01012 
<a name="l01013"></a>01013                                                                 <span class="keywordflow">if</span> ($this-&gt;checkExternalDocContentHash($phash_arr['phash_grouping'], $content_md5h) || $force)  {
<a name="l01014"></a>01014 
<a name="l01015"></a>01015                                                                                 <span class="comment">// Increment counter:</span>
<a name="l01016"></a>01016                                                                         $this-&gt;externalFileCounter++;
<a name="l01017"></a>01017 
<a name="l01018"></a>01018                                                                                 <span class="comment">// Splitting words</span>
<a name="l01019"></a>01019                                                                         $this-&gt;log_push('Extract words from content<span class="charliteral">','</span>');
<a name="l01020"></a>01020                                                                                 $splitInWords = $this-&gt;processWordsInArrays($contentParts);
<a name="l01021"></a>01021                                                                         $this-&gt;log_pull();
<a name="l01022"></a>01022 
<a name="l01023"></a>01023                                                                                 <span class="comment">// Analyse the indexed words.</span>
<a name="l01024"></a>01024                                                                         $this-&gt;log_push('Analyse the extracted words<span class="charliteral">','</span>');
<a name="l01025"></a>01025                                                                                 $indexArr = $this-&gt;indexAnalyze($splitInWords);
<a name="l01026"></a>01026                                                                         $this-&gt;log_pull();
<a name="l01027"></a>01027 
<a name="l01028"></a>01028                                                                                 <span class="comment">// Submitting page (phash) record</span>
<a name="l01029"></a>01029                                                                         $this-&gt;log_push('Submitting page<span class="charliteral">','</span>');
<a name="l01030"></a>01030                                                                                 $size = filesize($absFile);
<a name="l01031"></a>01031                                                                                 $ctime = filemtime($absFile);   <span class="comment">// Unfortunately I cannot determine WHEN a file is originally made - so I must return the modification time...</span>
<a name="l01032"></a>01032                                                                                 $this-&gt;submitFilePage($phash_arr,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts);
<a name="l01033"></a>01033                                                                         $this-&gt;log_pull();
<a name="l01034"></a>01034 
<a name="l01035"></a>01035                                                                                 <span class="comment">// Check words and submit to word list if not there</span>
<a name="l01036"></a>01036                                                                         $this-&gt;log_push('Check word list and submit words<span class="charliteral">','</span>');
<a name="l01037"></a>01037                                                                                 $this-&gt;checkWordList($indexArr);
<a name="l01038"></a>01038                                                                                 $this-&gt;submitWords($indexArr,$phash_arr['phash']);
<a name="l01039"></a>01039                                                                         $this-&gt;log_pull();
<a name="l01040"></a>01040 
<a name="l01041"></a>01041                                                                                 <span class="comment">// Set parsetime</span>
<a name="l01042"></a>01042                                                                         $this-&gt;updateParsetime($phash_arr['phash'],<a class="code" href="classt3lib__div.html#4351f76abf45f1fbae8dd0075744aa3c">t3lib_div::milliseconds</a>()-$Pstart);
<a name="l01043"></a>01043                                                                 } <span class="keywordflow">else</span> {
<a name="l01044"></a>01044                                                                         $this-&gt;updateTstamp($phash_arr['phash'],$mtime);        <span class="comment">// Update the timestamp</span>
<a name="l01045"></a>01045                                                                         $this-&gt;log_setTSlogMessage('Indexing not needed, the contentHash, '.$content_md5h.', has not changed. Timestamp updated.');
<a name="l01046"></a>01046                                                                 }
<a name="l01047"></a>01047                                                         } <span class="keywordflow">else</span> $this-&gt;log_setTSlogMessage('Could not index file! Unsupported extension.');
<a name="l01048"></a>01048                                                 } <span class="keywordflow">else</span> $this-&gt;log_setTSlogMessage('The limit of '.$this-&gt;maxExternalFiles.' has already been exceeded, so no indexing will take place <span class="keyword">this</span> time.');
<a name="l01049"></a>01049                                         } <span class="keywordflow">else</span> $this-&gt;log_setTSlogMessage('Indexing not needed, reason: '.$this-&gt;reasons[$check]);
<a name="l01050"></a>01050 
<a name="l01051"></a>01051                                                 <span class="comment">// Checking and setting sections:</span>
<a name="l01052"></a>01052 <span class="preprocessor">                #                       $this-&gt;submitFile_grlist($phash_arr['phash']);  // Setting a gr_list record if there is none already (set for default fe_group)</span>
<a name="l01053"></a>01053 <span class="preprocessor"></span>                                        $this-&gt;submitFile_section($phash_arr['phash']);         <span class="comment">// Setting a section-record for the file. This is done also if the file is not indexed. Notice that section records are deleted when the page is indexed.</span>
<a name="l01054"></a>01054                                         $this-&gt;log_pull();
<a name="l01055"></a>01055                                 }
<a name="l01056"></a>01056                         } <span class="keywordflow">else</span> $this-&gt;log_setTSlogMessage('Indexing not possible; The extension <span class="stringliteral">"'.$ext.'"</span> was not supported.');
<a name="l01057"></a>01057                 } <span class="keywordflow">else</span> $this-&gt;log_setTSlogMessage('Indexing not possible; File <span class="stringliteral">"'.$absFile.'"</span> not found or valid.');
<a name="l01058"></a>01058         }
<a name="l01059"></a>01059 
<a name="l01069"></a><a class="code" href="classtx__indexedsearch__indexer.html#e42830384cf215fd2ddfad098785cfe9">01069</a>         function readFileContent($ext,$absFile,$cPKey)  {
<a name="l01070"></a>01070 
<a name="l01071"></a>01071                         <span class="comment">// Consult relevant external document parser:</span>
<a name="l01072"></a>01072                 <span class="keywordflow">if</span> (is_object($this-&gt;external_parsers[$ext]))   {
<a name="l01073"></a>01073                         $contentArr = $this-&gt;external_parsers[$ext]-&gt;readFileContent($ext,$absFile,$cPKey);
<a name="l01074"></a>01074                 }
<a name="l01075"></a>01075 
<a name="l01076"></a>01076                 <span class="keywordflow">return</span> $contentArr;
<a name="l01077"></a>01077         }
<a name="l01078"></a>01078 
<a name="l01086"></a><a class="code" href="classtx__indexedsearch__indexer.html#20f87f9f8dc1d26dedc61aa8333c3d37">01086</a>         function fileContentParts($ext,$absFile)        {
<a name="l01087"></a>01087                 $cParts = array(0);
<a name="l01088"></a>01088 
<a name="l01089"></a>01089                         <span class="comment">// Consult relevant external document parser:</span>
<a name="l01090"></a>01090                 <span class="keywordflow">if</span> (is_object($this-&gt;external_parsers[$ext]))   {
<a name="l01091"></a>01091                         $cParts = $this-&gt;external_parsers[$ext]-&gt;fileContentParts($ext,$absFile);
<a name="l01092"></a>01092                 }
<a name="l01093"></a>01093 
<a name="l01094"></a>01094                 <span class="keywordflow">return</span> $cParts;
<a name="l01095"></a>01095         }
<a name="l01096"></a>01096 
<a name="l01104"></a><a class="code" href="classtx__indexedsearch__indexer.html#3b93c4977e4f7e0584571019dcb5617f">01104</a>         function splitRegularContent($content) {
<a name="l01105"></a>01105                 $contentArr = $this-&gt;defaultContentArray;
<a name="l01106"></a>01106                 $contentArr['body'] = $content;
<a name="l01107"></a>01107 
<a name="l01108"></a>01108                 <span class="keywordflow">return</span> $contentArr;
<a name="l01109"></a>01109         }
<a name="l01110"></a>01110 
<a name="l01111"></a>01111 
<a name="l01112"></a>01112 
<a name="l01113"></a>01113 
<a name="l01114"></a>01114 
<a name="l01115"></a>01115 
<a name="l01116"></a>01116 
<a name="l01117"></a>01117 
<a name="l01118"></a>01118 
<a name="l01119"></a>01119 
<a name="l01120"></a>01120 
<a name="l01121"></a>01121 
<a name="l01122"></a>01122 
<a name="l01123"></a>01123 
<a name="l01124"></a>01124         <span class="comment">/**********************************</span>
<a name="l01125"></a>01125 <span class="comment">         *</span>
<a name="l01126"></a>01126 <span class="comment">         * Analysing content, Extracting words</span>
<a name="l01127"></a>01127 <span class="comment">         *</span>
<a name="l01128"></a>01128 <span class="comment">         **********************************/</span>
<a name="l01129"></a>01129 
<a name="l01137"></a><a class="code" href="classtx__indexedsearch__indexer.html#9149e698f38441a582ea2a01d6060e6b">01137</a>         function charsetEntity2utf8(&amp;$contentArr, $charset)     {
<a name="l01138"></a>01138 
<a name="l01139"></a>01139                         <span class="comment">// Convert charset if necessary</span>
<a name="l01140"></a>01140                 reset($contentArr);
<a name="l01141"></a>01141                 <span class="keywordflow">while</span>(list($key,)=each($contentArr)) {
<a name="l01142"></a>01142                         <span class="keywordflow">if</span> (strlen($contentArr[$key]))  {
<a name="l01143"></a>01143 
<a name="l01144"></a>01144                                 <span class="keywordflow">if</span> ($charset!=='utf-8') {
<a name="l01145"></a>01145                                         $contentArr[$key] = $this-&gt;csObj-&gt;utf8_encode($contentArr[$key], $charset);
<a name="l01146"></a>01146                                 }
<a name="l01147"></a>01147 
<a name="l01148"></a>01148                                         <span class="comment">// decode all numeric / html-entities in the string to real characters:</span>
<a name="l01149"></a>01149                                 $contentArr[$key] = $this-&gt;csObj-&gt;entities_to_utf8($contentArr[$key],TRUE);
<a name="l01150"></a>01150                         }
<a name="l01151"></a>01151                 }
<a name="l01152"></a>01152         }
<a name="l01153"></a>01153 
<a name="l01160"></a><a class="code" href="classtx__indexedsearch__indexer.html#ef5baf9c2be6ef1b1051bc2cfdceb2a9">01160</a>         function processWordsInArrays($contentArr)      {
<a name="l01161"></a>01161 
<a name="l01162"></a>01162                         <span class="comment">// split all parts to words</span>
<a name="l01163"></a>01163                 reset($contentArr);
<a name="l01164"></a>01164                 <span class="keywordflow">while</span>(list($key,)=each($contentArr)) {
<a name="l01165"></a>01165                         $contentArr[$key] = $this-&gt;lexerObj-&gt;split2Words($contentArr[$key]);
<a name="l01166"></a>01166                 }
<a name="l01167"></a>01167 
<a name="l01168"></a>01168                         <span class="comment">// For title, keywords, and description we don't want duplicates:</span>
<a name="l01169"></a>01169                 $contentArr['title'] = array_unique($contentArr['title']);
<a name="l01170"></a>01170                 $contentArr['keywords'] = array_unique($contentArr['keywords']);
<a name="l01171"></a>01171                 $contentArr['description'] = array_unique($contentArr['description']);
<a name="l01172"></a>01172 
<a name="l01173"></a>01173                         <span class="comment">// Return modified array:</span>
<a name="l01174"></a>01174                 <span class="keywordflow">return</span> $contentArr;
<a name="l01175"></a>01175         }
<a name="l01176"></a>01176 
<a name="l01185"></a><a class="code" href="classtx__indexedsearch__indexer.html#59cf2f5f4972b86b45f6813b2492c69c">01185</a>         function procesWordsInArrays($contentArr)       {
<a name="l01186"></a>01186                 <span class="keywordflow">return</span> $this-&gt;processWordsInArrays($contentArr);
<a name="l01187"></a>01187         }
<a name="l01188"></a>01188 
<a name="l01195"></a><a class="code" href="classtx__indexedsearch__indexer.html#405210a3a7d6130cb0536a4e2b08f98e">01195</a>         function bodyDescription($contentArr)   {
<a name="l01196"></a>01196 
<a name="l01197"></a>01197                         <span class="comment">// Setting description</span>
<a name="l01198"></a>01198                 $maxL = <a class="code" href="classt3lib__div.html#79f6a47a7658e28e3f65666f8ebc19f8">t3lib_div::intInRange</a>($this-&gt;conf['index_descrLgd'],0,255,200);
<a name="l01199"></a>01199                 <span class="keywordflow">if</span> ($maxL)      {
<a name="l01200"></a>01200                                 <span class="comment">// Takes the quadruple lenght first, because whitespace and entities may be removed and thus shorten the string more yet.</span>
<a name="l01201"></a>01201 <span class="preprocessor">        #               $bodyDescription = implode(' ',split('[[:space:],]+',substr(trim($contentArr['body']),0,$maxL*4)));</span>
<a name="l01202"></a>01202 <span class="preprocessor"></span>                        $bodyDescription = str_replace(array(<span class="charliteral">' '</span>,<span class="stringliteral">"\t"</span>,<span class="stringliteral">"\r"</span>,<span class="stringliteral">"\n"</span>),<span class="charliteral">' '</span>,$contentArr['body']);
<a name="l01203"></a>01203 
<a name="l01204"></a>01204                                 <span class="comment">// Shorten the string:</span>
<a name="l01205"></a>01205                         $bodyDescription = $this-&gt;csObj-&gt;strtrunc('utf-8', $bodyDescription, $maxL);
<a name="l01206"></a>01206                 }
<a name="l01207"></a>01207 
<a name="l01208"></a>01208                 <span class="keywordflow">return</span> $bodyDescription;
<a name="l01209"></a>01209         }
<a name="l01210"></a>01210 
<a name="l01217"></a><a class="code" href="classtx__indexedsearch__indexer.html#ecd591dc0b5a02aad38bd390982fdf6d">01217</a>         function indexAnalyze($content) {
<a name="l01218"></a>01218                 $indexArr = Array();
<a name="l01219"></a>01219                 $counter = 0;
<a name="l01220"></a>01220 
<a name="l01221"></a>01221                 $this-&gt;analyzeHeaderinfo($indexArr,$content,'title',7);
<a name="l01222"></a>01222                 $this-&gt;analyzeHeaderinfo($indexArr,$content,'keywords',6);
<a name="l01223"></a>01223                 $this-&gt;analyzeHeaderinfo($indexArr,$content,'description',5);
<a name="l01224"></a>01224                 $this-&gt;analyzeBody($indexArr,$content);
<a name="l01225"></a>01225 
<a name="l01226"></a>01226                 <span class="keywordflow">return</span> ($indexArr);
<a name="l01227"></a>01227         }
<a name="l01228"></a>01228 
<a name="l01238"></a><a class="code" href="classtx__indexedsearch__indexer.html#d091944ac977d0f211773d6ff9fd1a19">01238</a>         function analyzeHeaderinfo(&amp;$retArr,$content,$key,$offset) {
<a name="l01239"></a>01239                 reset($content[$key]);
<a name="l01240"></a>01240                 <span class="keywordflow">while</span>(list(,$val)=each($content[$key]))  {
<a name="l01241"></a>01241                         $val = substr($val,0,60);       <span class="comment">// Max 60 - because the baseword varchar IS 60. This MUST be the same.</span>
<a name="l01242"></a>01242                         $retArr[$val]['cmp'] = $retArr[$val]['cmp']|pow(2,$offset);
<a name="l01243"></a>01243                         $retArr[$val]['count'] = $retArr[$val]['count']+1;
<a name="l01244"></a>01244                         $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
<a name="l01245"></a>01245                         $retArr[$val]['metaphone'] = $this-&gt;metaphone($val);
<a name="l01246"></a>01246                         $this-&gt;wordcount++;
<a name="l01247"></a>01247                 }
<a name="l01248"></a>01248         }
<a name="l01249"></a>01249 
<a name="l01257"></a><a class="code" href="classtx__indexedsearch__indexer.html#41cbd6eaed41905ae8b941eb5f5d4a6f">01257</a>         function analyzeBody(&amp;$retArr,$content) {
<a name="l01258"></a>01258                 foreach($content['body'] as $key =&gt; $val)       {
<a name="l01259"></a>01259                         $val = substr($val,0,60);       <span class="comment">// Max 60 - because the baseword varchar IS 60. This MUST be the same.</span>
<a name="l01260"></a>01260                         <span class="keywordflow">if</span>(!isset($retArr[$val])) {
<a name="l01261"></a>01261                                 $retArr[$val]['first'] = $key;
<a name="l01262"></a>01262                                 $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
<a name="l01263"></a>01263                                 $retArr[$val]['metaphone'] = $this-&gt;metaphone($val);
<a name="l01264"></a>01264                         }
<a name="l01265"></a>01265                         $retArr[$val]['count'] = $retArr[$val]['count']+1;
<a name="l01266"></a>01266                         $this-&gt;wordcount++;
<a name="l01267"></a>01267                 }
<a name="l01268"></a>01268         }
<a name="l01269"></a>01269 
<a name="l01277"></a><a class="code" href="classtx__indexedsearch__indexer.html#93b8a8ac71ee168856d1840cb6609473">01277</a>         function metaphone($word,$retRaw=FALSE) {
<a name="l01278"></a>01278 
<a name="l01279"></a>01279                 <span class="keywordflow">if</span> (is_object($this-&gt;metaphoneObj))     {
<a name="l01280"></a>01280                         $tmp = $this-&gt;metaphoneObj-&gt;metaphone($word, $this-&gt;conf['sys_language_uid']);
<a name="l01281"></a>01281                 } <span class="keywordflow">else</span> {
<a name="l01282"></a>01282                         $tmp = metaphone($word);
<a name="l01283"></a>01283                 }
<a name="l01284"></a>01284 
<a name="l01285"></a>01285                         <span class="comment">// Return raw value?</span>
<a name="l01286"></a>01286                 <span class="keywordflow">if</span> ($retRaw)    <span class="keywordflow">return</span> $tmp;
<a name="l01287"></a>01287 
<a name="l01288"></a>01288                         <span class="comment">// Otherwise create hash and return integer</span>
<a name="l01289"></a>01289                 <span class="keywordflow">if</span>($tmp=='') $ret=0; <span class="keywordflow">else</span> $ret=hexdec(substr(md5($tmp),0,7));
<a name="l01290"></a>01290                 <span class="keywordflow">return</span> $ret;
<a name="l01291"></a>01291         }
<a name="l01292"></a>01292 
<a name="l01293"></a>01293 
<a name="l01294"></a>01294 
<a name="l01295"></a>01295 
<a name="l01296"></a>01296 
<a name="l01297"></a>01297 
<a name="l01298"></a>01298 
<a name="l01299"></a>01299 
<a name="l01300"></a>01300 
<a name="l01301"></a>01301 
<a name="l01302"></a>01302 
<a name="l01303"></a>01303 
<a name="l01304"></a>01304 
<a name="l01305"></a>01305 
<a name="l01306"></a>01306 
<a name="l01307"></a>01307 
<a name="l01308"></a>01308         <span class="comment">/********************************</span>
<a name="l01309"></a>01309 <span class="comment">         *</span>
<a name="l01310"></a>01310 <span class="comment">         * SQL; TYPO3 Pages</span>
<a name="l01311"></a>01311 <span class="comment">         *</span>
<a name="l01312"></a>01312 <span class="comment">         *******************************/</span>
<a name="l01313"></a>01313 
<a name="l01319"></a><a class="code" href="classtx__indexedsearch__indexer.html#73b916e9285d639204a3c1dd6ea2deba">01319</a>         function submitPage()   {
<a name="l01320"></a>01320 
<a name="l01321"></a>01321                         <span class="comment">// Remove any current data for this phash:</span>
<a name="l01322"></a>01322                 $this-&gt;removeOldIndexedPages($this-&gt;hash['phash']);
<a name="l01323"></a>01323 
<a name="l01324"></a>01324                         <span class="comment">// setting new phash_row</span>
<a name="l01325"></a>01325                 $fields = array(
<a name="l01326"></a>01326                         'phash' =&gt; $this-&gt;hash['phash'],
<a name="l01327"></a>01327                         'phash_grouping' =&gt; $this-&gt;hash['phash_grouping'],
<a name="l01328"></a>01328                         'cHashParams' =&gt; serialize($this-&gt;cHashParams),
<a name="l01329"></a>01329                         'contentHash' =&gt; $this-&gt;content_md5h,
<a name="l01330"></a>01330                         'data_page_id' =&gt; $this-&gt;conf['<span class="keywordtype">id</span>'],
<a name="l01331"></a>01331                         'data_page_reg1' =&gt; $this-&gt;conf['page_cache_reg1'],
<a name="l01332"></a>01332                         'data_page_type' =&gt; $this-&gt;conf['type'],
<a name="l01333"></a>01333                         'data_page_mp' =&gt; $this-&gt;conf['MP'],
<a name="l01334"></a>01334                         'gr_list' =&gt; $this-&gt;conf['gr_list'],
<a name="l01335"></a>01335                         'item_type' =&gt; 0,       <span class="comment">// TYPO3 page</span>
<a name="l01336"></a>01336                         'item_title' =&gt; $this-&gt;contentParts['title'],
<a name="l01337"></a>01337                         'item_description' =&gt; $this-&gt;bodyDescription($this-&gt;contentParts),
<a name="l01338"></a>01338                         'item_mtime' =&gt; $this-&gt;conf['mtime'],
<a name="l01339"></a>01339                         'item_size' =&gt; strlen($this-&gt;conf['content']),
<a name="l01340"></a>01340                         'tstamp' =&gt; time(),
<a name="l01341"></a>01341                         'crdate' =&gt; time(),
<a name="l01342"></a>01342                         'item_crdate' =&gt; $this-&gt;conf['crdate'], <span class="comment">// Creation date of page</span>
<a name="l01343"></a>01343                         'sys_language_uid' =&gt; $this-&gt;conf['sys_language_uid'],  <span class="comment">// Sys language uid of the page. Should reflect which language it DOES actually display!</span>
<a name="l01344"></a>01344                         'externalUrl' =&gt; 0,
<a name="l01345"></a>01345                         'recordUid' =&gt; intval($this-&gt;conf['recordUid']),
<a name="l01346"></a>01346                         'freeIndexUid' =&gt; intval($this-&gt;conf['freeIndexUid']),
<a name="l01347"></a>01347                         'freeIndexSetId' =&gt; intval($this-&gt;conf['freeIndexSetId']),
<a name="l01348"></a>01348                 );
<a name="l01349"></a>01349 
<a name="l01350"></a>01350                 $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_phash', $fields);
<a name="l01351"></a>01351 
<a name="l01352"></a>01352                         <span class="comment">// PROCESSING index_section</span>
<a name="l01353"></a>01353                 $this-&gt;submit_section($this-&gt;hash['phash'],$this-&gt;hash['phash']);
<a name="l01354"></a>01354 
<a name="l01355"></a>01355                         <span class="comment">// PROCESSING index_grlist</span>
<a name="l01356"></a>01356                 $this-&gt;submit_grlist($this-&gt;hash['phash'],$this-&gt;hash['phash']);
<a name="l01357"></a>01357 
<a name="l01358"></a>01358                         <span class="comment">// PROCESSING index_fulltext</span>
<a name="l01359"></a>01359                 $fields = array(
<a name="l01360"></a>01360                         'phash' =&gt; $this-&gt;hash['phash'],
<a name="l01361"></a>01361                         'fulltextdata' =&gt; implode(<span class="charliteral">' '</span>, $this-&gt;contentParts)
<a name="l01362"></a>01362                 );
<a name="l01363"></a>01363                 <span class="keywordflow">if</span> ($this-&gt;indexerConfig['fullTextDataLength']&gt;0)       {
<a name="l01364"></a>01364                         $fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this-&gt;indexerConfig['fullTextDataLength']);
<a name="l01365"></a>01365                 }
<a name="l01366"></a>01366                 $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_fulltext', $fields);
<a name="l01367"></a>01367 
<a name="l01368"></a>01368                         <span class="comment">// PROCESSING index_debug</span>
<a name="l01369"></a>01369                 <span class="keywordflow">if</span> ($this-&gt;indexerConfig['debugMode'])  {
<a name="l01370"></a>01370                         $fields = array(
<a name="l01371"></a>01371                                 'phash' =&gt; $this-&gt;hash['phash'],
<a name="l01372"></a>01372                                 'debuginfo' =&gt; serialize(array(
<a name="l01373"></a>01373                                                 'cHashParams' =&gt; $this-&gt;cHashParams,
<a name="l01374"></a>01374                                                 'external_parsers initialized' =&gt; array_keys($this-&gt;external_parsers),
<a name="l01375"></a>01375                                                 'conf' =&gt; array_merge($this-&gt;conf,array('content'=&gt;substr($this-&gt;conf['content'],0,1000))),
<a name="l01376"></a>01376                                                 'contentParts' =&gt; array_merge($this-&gt;contentParts,array('body' =&gt; substr($this-&gt;contentParts['body'],0,1000))),
<a name="l01377"></a>01377                                                 'logs' =&gt; $this-&gt;internal_log,
<a name="l01378"></a>01378                                                 'lexer' =&gt; $this-&gt;lexerObj-&gt;debugString,
<a name="l01379"></a>01379                                         ))
<a name="l01380"></a>01380                         );
<a name="l01381"></a>01381                         $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_debug', $fields);
<a name="l01382"></a>01382                 }
<a name="l01383"></a>01383         }
<a name="l01384"></a>01384 
<a name="l01393"></a><a class="code" href="classtx__indexedsearch__indexer.html#e48bf91006e2fece9e88d0460fc5bba4">01393</a>         function submit_grlist($hash,$phash_x)  {
<a name="l01394"></a>01394 
<a name="l01395"></a>01395                         <span class="comment">// Setting the gr_list record</span>
<a name="l01396"></a>01396                 $fields = array(
<a name="l01397"></a>01397                         'phash' =&gt; $hash,
<a name="l01398"></a>01398                         'phash_x' =&gt; $phash_x,
<a name="l01399"></a>01399                         'hash_gr_list' =&gt; $this-&gt;md5inthash($this-&gt;conf['gr_list']),
<a name="l01400"></a>01400                         'gr_list' =&gt; $this-&gt;conf['gr_list']
<a name="l01401"></a>01401                 );
<a name="l01402"></a>01402                 $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_grlist', $fields);
<a name="l01403"></a>01403         }
<a name="l01404"></a>01404 
<a name="l01413"></a><a class="code" href="classtx__indexedsearch__indexer.html#77d77bbc681f836b92cc7f3504442e3f">01413</a>         function submit_section($hash,$hash_t3) {
<a name="l01414"></a>01414                 $fields = array(
<a name="l01415"></a>01415                         'phash' =&gt; $hash,
<a name="l01416"></a>01416                         'phash_t3' =&gt; $hash_t3,
<a name="l01417"></a>01417                         'page_id' =&gt; intval($this-&gt;conf['<span class="keywordtype">id</span>'])
<a name="l01418"></a>01418                 );
<a name="l01419"></a>01419 
<a name="l01420"></a>01420                 $this-&gt;getRootLineFields($fields);
<a name="l01421"></a>01421 
<a name="l01422"></a>01422                 $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_section', $fields);
<a name="l01423"></a>01423         }
<a name="l01424"></a>01424 
<a name="l01431"></a><a class="code" href="classtx__indexedsearch__indexer.html#8fc69807bd765f8ffb5816e900aa823c">01431</a>         function removeOldIndexedPages($phash)  {
<a name="l01432"></a>01432                         <span class="comment">// Removing old registrations for all tables. Because the pages are TYPO3 pages there can be nothing else than 1-1 relations here.</span>
<a name="l01433"></a>01433                 $tableArr = explode(<span class="charliteral">','</span>,'index_phash,index_section,index_grlist,index_fulltext,index_debug');
<a name="l01434"></a>01434                 foreach($tableArr as $table)    {
<a name="l01435"></a>01435                         $GLOBALS['TYPO3_DB']-&gt;exec_DELETEquery($table, 'phash='.intval($phash));
<a name="l01436"></a>01436                 }
<a name="l01437"></a>01437                         <span class="comment">// Removing all index_section records with hash_t3 set to this hash (this includes such records set for external media on the page as well!). The re-insert of these records are done in indexRegularDocument($file).</span>
<a name="l01438"></a>01438                 $GLOBALS['TYPO3_DB']-&gt;exec_DELETEquery('index_section', 'phash_t3='.intval($phash));
<a name="l01439"></a>01439         }
<a name="l01440"></a>01440 
<a name="l01441"></a>01441 
<a name="l01442"></a>01442 
<a name="l01443"></a>01443 
<a name="l01444"></a>01444 
<a name="l01445"></a>01445 
<a name="l01446"></a>01446 
<a name="l01447"></a>01447 
<a name="l01448"></a>01448 
<a name="l01449"></a>01449 
<a name="l01450"></a>01450 
<a name="l01451"></a>01451 
<a name="l01452"></a>01452 
<a name="l01453"></a>01453         <span class="comment">/********************************</span>
<a name="l01454"></a>01454 <span class="comment">         *</span>
<a name="l01455"></a>01455 <span class="comment">         * SQL; External media</span>
<a name="l01456"></a>01456 <span class="comment">         *</span>
<a name="l01457"></a>01457 <span class="comment">         *******************************/</span>
<a name="l01458"></a>01458 
<a name="l01459"></a>01459 
<a name="l01474"></a><a class="code" href="classtx__indexedsearch__indexer.html#c0367767b6eb7318e09705da6cbd9109">01474</a>         function submitFilePage($hash,$file,$subinfo,$ext,$mtime,$ctime,$size,$content_md5h,$contentParts)      {
<a name="l01475"></a>01475 
<a name="l01476"></a>01476                         <span class="comment">// Find item Type:</span>
<a name="l01477"></a>01477                 $storeItemType = $this-&gt;external_parsers[$ext]-&gt;ext2itemtype_map[$ext];
<a name="l01478"></a>01478                 $storeItemType = $storeItemType ? $storeItemType : $ext;
<a name="l01479"></a>01479 
<a name="l01480"></a>01480                         <span class="comment">// Remove any current data for this phash:</span>
<a name="l01481"></a>01481                 $this-&gt;removeOldIndexedFiles($hash['phash']);
<a name="l01482"></a>01482 
<a name="l01483"></a>01483                         <span class="comment">// Split filename:</span>
<a name="l01484"></a>01484                 $fileParts = parse_url($file);
<a name="l01485"></a>01485 
<a name="l01486"></a>01486                         <span class="comment">// Setting new</span>
<a name="l01487"></a>01487                 $fields = array(
<a name="l01488"></a>01488                         'phash' =&gt; $hash['phash'],
<a name="l01489"></a>01489                         'phash_grouping' =&gt; $hash['phash_grouping'],
<a name="l01490"></a>01490                         'cHashParams' =&gt; serialize($subinfo),
<a name="l01491"></a>01491                         'contentHash' =&gt; $content_md5h,
<a name="l01492"></a>01492                         'data_filename' =&gt; $file,
<a name="l01493"></a>01493                         'item_type' =&gt; $storeItemType,
<a name="l01494"></a>01494                         'item_title' =&gt; trim($contentParts['title']) ? $contentParts['title'] : basename($file),
<a name="l01495"></a>01495                         'item_description' =&gt; $this-&gt;bodyDescription($contentParts),
<a name="l01496"></a>01496                         'item_mtime' =&gt; $mtime,
<a name="l01497"></a>01497                         'item_size' =&gt; $size,
<a name="l01498"></a>01498                         'item_crdate' =&gt; $ctime,
<a name="l01499"></a>01499                         'tstamp' =&gt; time(),
<a name="l01500"></a>01500                         'crdate' =&gt; time(),
<a name="l01501"></a>01501                         'gr_list' =&gt; $this-&gt;conf['gr_list'],
<a name="l01502"></a>01502                         'externalUrl' =&gt; $fileParts['scheme'] ? 1 : 0,
<a name="l01503"></a>01503                         'recordUid' =&gt; intval($this-&gt;conf['recordUid']),
<a name="l01504"></a>01504                         'freeIndexUid' =&gt; intval($this-&gt;conf['freeIndexUid']),
<a name="l01505"></a>01505                         'freeIndexSetId' =&gt; intval($this-&gt;conf['freeIndexSetId']),
<a name="l01506"></a>01506                 );
<a name="l01507"></a>01507                 $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_phash', $fields);
<a name="l01508"></a>01508 
<a name="l01509"></a>01509                         <span class="comment">// PROCESSING index_fulltext</span>
<a name="l01510"></a>01510                 $fields = array(
<a name="l01511"></a>01511                         'phash' =&gt; $hash['phash'],
<a name="l01512"></a>01512                         'fulltextdata' =&gt; implode(<span class="charliteral">' '</span>, $contentParts)
<a name="l01513"></a>01513                 );
<a name="l01514"></a>01514                 <span class="keywordflow">if</span> ($this-&gt;indexerConfig['fullTextDataLength']&gt;0)       {
<a name="l01515"></a>01515                         $fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this-&gt;indexerConfig['fullTextDataLength']);
<a name="l01516"></a>01516                 }
<a name="l01517"></a>01517                 $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_fulltext', $fields);
<a name="l01518"></a>01518 
<a name="l01519"></a>01519                         <span class="comment">// PROCESSING index_debug</span>
<a name="l01520"></a>01520                 <span class="keywordflow">if</span> ($this-&gt;indexerConfig['debugMode'])  {
<a name="l01521"></a>01521                         $fields = array(
<a name="l01522"></a>01522                                 'phash' =&gt; $hash['phash'],
<a name="l01523"></a>01523                                 'debuginfo' =&gt; serialize(array(
<a name="l01524"></a>01524                                                 'cHashParams' =&gt; $subinfo,
<a name="l01525"></a>01525                                                 'contentParts' =&gt; array_merge($contentParts,array('body' =&gt; substr($contentParts['body'],0,1000))),
<a name="l01526"></a>01526                                                 'logs' =&gt; $this-&gt;internal_log,
<a name="l01527"></a>01527                                                 'lexer' =&gt; $this-&gt;lexerObj-&gt;debugString,
<a name="l01528"></a>01528                                         ))
<a name="l01529"></a>01529                         );
<a name="l01530"></a>01530                         $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_debug', $fields);
<a name="l01531"></a>01531                 }
<a name="l01532"></a>01532         }
<a name="l01533"></a>01533 
<a name="l01540"></a><a class="code" href="classtx__indexedsearch__indexer.html#55ddd0d7808664b5afecca8a70d71047">01540</a>         function submitFile_grlist($hash)       {
<a name="l01541"></a>01541                         <span class="comment">// Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one.</span>
<a name="l01542"></a>01542                 $res = $GLOBALS['TYPO3_DB']-&gt;exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this-&gt;md5inthash($this-&gt;defaultGrList).' OR hash_gr_list='.$this-&gt;md5inthash($this-&gt;conf['gr_list']).<span class="charliteral">')'</span>);
<a name="l01543"></a>01543                 <span class="keywordflow">if</span> (!$GLOBALS['TYPO3_DB']-&gt;sql_num_rows($res))  {
<a name="l01544"></a>01544                         $this-&gt;submit_grlist($hash,$hash);
<a name="l01545"></a>01545                 }
<a name="l01546"></a>01546         }
<a name="l01547"></a>01547 
<a name="l01554"></a><a class="code" href="classtx__indexedsearch__indexer.html#be3c19c3de2d48dc7e757a5be7ce00df">01554</a>         function submitFile_section($hash)      {
<a name="l01555"></a>01555                         <span class="comment">// Testing if there is a section</span>
<a name="l01556"></a>01556                 $res = $GLOBALS['TYPO3_DB']-&gt;exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this-&gt;conf['<span class="keywordtype">id</span>']));
<a name="l01557"></a>01557                 <span class="keywordflow">if</span> (!$GLOBALS['TYPO3_DB']-&gt;sql_num_rows($res))  {
<a name="l01558"></a>01558                         $this-&gt;submit_section($hash,$this-&gt;hash['phash']);
<a name="l01559"></a>01559                 }
<a name="l01560"></a>01560         }
<a name="l01561"></a>01561 
<a name="l01568"></a><a class="code" href="classtx__indexedsearch__indexer.html#42c579e28a7cec9bd2c36f1b9cfe377f">01568</a>         function removeOldIndexedFiles($phash)  {
<a name="l01569"></a>01569 
<a name="l01570"></a>01570                         <span class="comment">// Removing old registrations for tables.</span>
<a name="l01571"></a>01571                 $tableArr = explode(<span class="charliteral">','</span>,'index_phash,index_grlist,index_fulltext,index_debug');
<a name="l01572"></a>01572                 foreach($tableArr as $table)    {
<a name="l01573"></a>01573                         $GLOBALS['TYPO3_DB']-&gt;exec_DELETEquery($table, 'phash='.intval($phash));
<a name="l01574"></a>01574                 }
<a name="l01575"></a>01575         }
<a name="l01576"></a>01576 
<a name="l01577"></a>01577 
<a name="l01578"></a>01578 
<a name="l01579"></a>01579 
<a name="l01580"></a>01580 
<a name="l01581"></a>01581 
<a name="l01582"></a>01582 
<a name="l01583"></a>01583 
<a name="l01584"></a>01584 
<a name="l01585"></a>01585 
<a name="l01586"></a>01586 
<a name="l01587"></a>01587 
<a name="l01588"></a>01588 
<a name="l01589"></a>01589 
<a name="l01590"></a>01590         <span class="comment">/********************************</span>
<a name="l01591"></a>01591 <span class="comment">         *</span>
<a name="l01592"></a>01592 <span class="comment">         * SQL Helper functions</span>
<a name="l01593"></a>01593 <span class="comment">         *</span>
<a name="l01594"></a>01594 <span class="comment">         *******************************/</span>
<a name="l01595"></a>01595 
<a name="l01604"></a><a class="code" href="classtx__indexedsearch__indexer.html#907f6e3af30a437ac65b03875082dbc3">01604</a>         function checkMtimeTstamp($mtime,$phash)        {
<a name="l01605"></a>01605 
<a name="l01606"></a>01606                         <span class="comment">// Select indexed page:</span>
<a name="l01607"></a>01607                 $res = $GLOBALS['TYPO3_DB']-&gt;exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash));
<a name="l01608"></a>01608                 $out = 0;
<a name="l01609"></a>01609 
<a name="l01610"></a>01610                         <span class="comment">// If there was an indexing of the page...:</span>
<a name="l01611"></a>01611                 <span class="keywordflow">if</span> ($row = $GLOBALS['TYPO3_DB']-&gt;sql_fetch_assoc($res)) {
<a name="l01612"></a>01612                         <span class="keywordflow">if</span> ($this-&gt;tstamp_maxAge &amp;&amp; ($row['tstamp']+$this-&gt;tstamp_maxAge) &lt; time())     {               <span class="comment">// If max age is exceeded, index the page</span>
<a name="l01613"></a>01613                                 $out = 1;               <span class="comment">// The configured max-age was exceeded for the document and thus it's indexed.</span>
<a name="l01614"></a>01614                         } <span class="keywordflow">else</span> {
<a name="l01615"></a>01615                                 <span class="keywordflow">if</span> (!$this-&gt;tstamp_minAge || ($row['tstamp']+$this-&gt;tstamp_minAge)&lt;time())      {       <span class="comment">// if minAge is not set or if minAge is exceeded, consider at mtime</span>
<a name="l01616"></a>01616                                         <span class="keywordflow">if</span> ($mtime)     {               <span class="comment">// It mtime is set, then it's tested. If not, the page must clearly be indexed.</span>
<a name="l01617"></a>01617                                                 <span class="keywordflow">if</span> ($row['item_mtime'] != $mtime)       {       <span class="comment">// And if mtime is different from the index_phash mtime, it's about time to re-index.</span>
<a name="l01618"></a>01618                                                         $out = 2;               <span class="comment">// The minimum age was exceed and mtime was set and the mtime was different, so the page was indexed.</span>
<a name="l01619"></a>01619                                                 } <span class="keywordflow">else</span> {
<a name="l01620"></a>01620                                                         $out = -1;              <span class="comment">// mtime matched the document, so no changes detected and no content updated</span>
<a name="l01621"></a>01621                                                         <span class="keywordflow">if</span> ($this-&gt;tstamp_maxAge)       {
<a name="l01622"></a>01622                                                                 $this-&gt;log_setTSlogMessage('Mtime matched, timestamp NOT updated because a maxAge is set ('.($row['tstamp'] + $this-&gt;tstamp_maxAge - time()).' seconds to expire time).',1);
<a name="l01623"></a>01623                                                         } <span class="keywordflow">else</span> {
<a name="l01624"></a>01624                                                                 $this-&gt;updateTstamp($phash);    <span class="comment">// Update the timestatmp</span>
<a name="l01625"></a>01625                                                                 $this-&gt;log_setTSlogMessage('Mtime matched, timestamp updated.',1);
<a name="l01626"></a>01626                                                         }
<a name="l01627"></a>01627                                                 }
<a name="l01628"></a>01628                                         } <span class="keywordflow">else</span> {$out = 3;       }       <span class="comment">// The minimum age was exceed, but mtime was not set, so the page was indexed.</span>
<a name="l01629"></a>01629                                 } <span class="keywordflow">else</span> {$out = -2;}                     <span class="comment">// The minimum age was not exceeded</span>
<a name="l01630"></a>01630                         }
<a name="l01631"></a>01631                 } <span class="keywordflow">else</span> {$out = 4;}      <span class="comment">// Page has never been indexed (is not represented in the index_phash table).</span>
<a name="l01632"></a>01632                 <span class="keywordflow">return</span> $out;
<a name="l01633"></a>01633         }
<a name="l01634"></a>01634 
<a name="l01640"></a><a class="code" href="classtx__indexedsearch__indexer.html#63201744025e2e50dfde029c5a9c8bb8">01640</a>         function checkContentHash()     {
<a name="l01641"></a>01641                         <span class="comment">// With this query the page will only be indexed if it's content is different from the same "phash_grouping" -page.</span>
<a name="l01642"></a>01642                 $res = $GLOBALS['TYPO3_DB']-&gt;exec_SELECTquery('phash', 'index_phash A', 'A.phash_grouping='.intval($this-&gt;hash['phash_grouping']).' AND A.contentHash='.intval($this-&gt;content_md5h));
<a name="l01643"></a>01643                 <span class="keywordflow">if</span> ($row = $GLOBALS['TYPO3_DB']-&gt;sql_fetch_assoc($res)) {
<a name="l01644"></a>01644                         <span class="keywordflow">return</span> $row;
<a name="l01645"></a>01645                 }
<a name="l01646"></a>01646                 <span class="keywordflow">return</span> 1;
<a name="l01647"></a>01647         }
<a name="l01648"></a>01648 
<a name="l01657"></a><a class="code" href="classtx__indexedsearch__indexer.html#0f17409c9855c00f60fb186fc1e0216d">01657</a>         function checkExternalDocContentHash($hashGr,$content_md5h)     {
<a name="l01658"></a>01658                 $res = $GLOBALS['TYPO3_DB']-&gt;exec_SELECTquery(<span class="charliteral">'*'</span>, 'index_phash A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h));
<a name="l01659"></a>01659                 <span class="keywordflow">if</span> ($row = $GLOBALS['TYPO3_DB']-&gt;sql_fetch_assoc($res)) {
<a name="l01660"></a>01660                         <span class="keywordflow">return</span> 0;
<a name="l01661"></a>01661                 }
<a name="l01662"></a>01662                 <span class="keywordflow">return</span> 1;
<a name="l01663"></a>01663         }
<a name="l01664"></a>01664 
<a name="l01671"></a><a class="code" href="classtx__indexedsearch__indexer.html#c9f4be79a1cd07065d9310e938021bb6">01671</a>         function is_grlist_set($phash_x)        {
<a name="l01672"></a>01672                 $res = $GLOBALS['TYPO3_DB']-&gt;exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x));
<a name="l01673"></a>01673                 <span class="keywordflow">return</span> $GLOBALS['TYPO3_DB']-&gt;sql_num_rows($res);
<a name="l01674"></a>01674         }
<a name="l01675"></a>01675 
<a name="l01684"></a><a class="code" href="classtx__indexedsearch__indexer.html#ffbff13f630a15533bc87dde2f37dd81">01684</a>         function update_grlist($phash,$phash_x) {
<a name="l01685"></a>01685                 $res = $GLOBALS['TYPO3_DB']-&gt;exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this-&gt;md5inthash($this-&gt;conf['gr_list']));
<a name="l01686"></a>01686                 <span class="keywordflow">if</span> (!$GLOBALS['TYPO3_DB']-&gt;sql_num_rows($res))  {
<a name="l01687"></a>01687                         $this-&gt;submit_grlist($phash,$phash_x);
<a name="l01688"></a>01688                         $this-&gt;log_setTSlogMessage(<span class="stringliteral">"Inserted gr_list '"</span>.$this-&gt;conf['gr_list'].<span class="stringliteral">"' for phash '"</span>.$phash.<span class="stringliteral">"'"</span>,1);
<a name="l01689"></a>01689                 }
<a name="l01690"></a>01690         }
<a name="l01691"></a>01691 
<a name="l01699"></a><a class="code" href="classtx__indexedsearch__indexer.html#ccf2c8b27a41013ae45c46a5d8991bb6">01699</a>         function updateTstamp($phash,$mtime=0)  {
<a name="l01700"></a>01700                 $updateFields = array(
<a name="l01701"></a>01701                         'tstamp' =&gt; time()
<a name="l01702"></a>01702                 );
<a name="l01703"></a>01703                 <span class="keywordflow">if</span> ($mtime)     { $updateFields['item_mtime'] = intval($mtime); }
<a name="l01704"></a>01704 
<a name="l01705"></a>01705                 $GLOBALS['TYPO3_DB']-&gt;exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
<a name="l01706"></a>01706         }
<a name="l01707"></a>01707 
<a name="l01714"></a><a class="code" href="classtx__indexedsearch__indexer.html#e4c93125d3b2259a59116538c11ab573">01714</a>         function updateSetId($phash)    {
<a name="l01715"></a>01715                 $updateFields = array(
<a name="l01716"></a>01716                         'freeIndexSetId' =&gt; intval($this-&gt;conf['freeIndexSetId'])
<a name="l01717"></a>01717                 );
<a name="l01718"></a>01718 
<a name="l01719"></a>01719                 $GLOBALS['TYPO3_DB']-&gt;exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
<a name="l01720"></a>01720         }
<a name="l01721"></a>01721 
<a name="l01729"></a><a class="code" href="classtx__indexedsearch__indexer.html#135006b58277d3b746e8beb0b405673d">01729</a>         function updateParsetime($phash,$parsetime)     {
<a name="l01730"></a>01730                 $updateFields = array(
<a name="l01731"></a>01731                         'parsetime' =&gt; intval($parsetime)
<a name="l01732"></a>01732                 );
<a name="l01733"></a>01733 
<a name="l01734"></a>01734                 $GLOBALS['TYPO3_DB']-&gt;exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
<a name="l01735"></a>01735         }
<a name="l01736"></a>01736 
<a name="l01742"></a><a class="code" href="classtx__indexedsearch__indexer.html#efcb7dac883028cce374d4e6fcacbad0">01742</a>         function updateRootline()       {
<a name="l01743"></a>01743 
<a name="l01744"></a>01744                 $updateFields = array();
<a name="l01745"></a>01745                 $this-&gt;getRootLineFields($updateFields);
<a name="l01746"></a>01746 
<a name="l01747"></a>01747                 $GLOBALS['TYPO3_DB']-&gt;exec_UPDATEquery('index_section', 'page_id='.intval($this-&gt;conf['<span class="keywordtype">id</span>']), $updateFields);
<a name="l01748"></a>01748         }
<a name="l01749"></a>01749 
<a name="l01757"></a><a class="code" href="classtx__indexedsearch__indexer.html#cab7a8360cd4b2fdef738ad8a2095447">01757</a>         function getRootLineFields(&amp;$fieldArr)  {
<a name="l01758"></a>01758 
<a name="l01759"></a>01759                 $fieldArr['rl0'] = intval($this-&gt;conf['rootline_uids'][0]);
<a name="l01760"></a>01760                 $fieldArr['rl1'] = intval($this-&gt;conf['rootline_uids'][1]);
<a name="l01761"></a>01761                 $fieldArr['rl2'] = intval($this-&gt;conf['rootline_uids'][2]);
<a name="l01762"></a>01762 
<a name="l01763"></a>01763                 <span class="keywordflow">if</span> (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields']))    {
<a name="l01764"></a>01764                         foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName =&gt; $rootLineLevel)  {
<a name="l01765"></a>01765                                 $fieldArr[$fieldName] = intval($this-&gt;conf['rootline_uids'][$rootLineLevel]);
<a name="l01766"></a>01766                         }
<a name="l01767"></a>01767                 }
<a name="l01768"></a>01768         }
<a name="l01769"></a>01769 
<a name="l01776"></a><a class="code" href="classtx__indexedsearch__indexer.html#0adbd1f72b63a5bb89282beaf27ee269">01776</a>         function removeLoginpagesWithContentHash()      {
<a name="l01777"></a>01777                 $res = $GLOBALS['TYPO3_DB']-&gt;exec_SELECTquery(<span class="charliteral">'*'</span>, 'index_phash A,index_grlist B', '
<a name="l01778"></a>01778                                         A.phash=B.phash
<a name="l01779"></a>01779                                         AND A.phash_grouping='.intval($this-&gt;hash['phash_grouping']).'
<a name="l01780"></a>01780                                         AND B.hash_gr_list!='.$this-&gt;md5inthash($this-&gt;defaultGrList).'
<a name="l01781"></a>01781                                         AND A.contentHash='.intval($this-&gt;content_md5h));
<a name="l01782"></a>01782                 <span class="keywordflow">while</span>($row = $GLOBALS['TYPO3_DB']-&gt;sql_fetch_assoc($res))       {
<a name="l01783"></a>01783                         $this-&gt;log_setTSlogMessage(<span class="stringliteral">"The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='"</span>.$row['phash'].<span class="stringliteral">"' are now removed."</span>,1);
<a name="l01784"></a>01784                         $this-&gt;removeOldIndexedPages($row['phash']);
<a name="l01785"></a>01785                 }
<a name="l01786"></a>01786         }
<a name="l01787"></a>01787 
<a name="l01793"></a><a class="code" href="classtx__indexedsearch__indexer.html#2c288570c04d648564f076f05a5c2c72">01793</a>         function includeCrawlerClass()  {
<a name="l01794"></a>01794                 global $TYPO3_CONF_VARS;
<a name="l01795"></a>01795 
<a name="l01796"></a>01796                 require_once(<a class="code" href="classt3lib__extMgm.html#f395b211d97293002bbf92c3802392f5">t3lib_extMgm::extPath</a>('crawler').'<span class="keyword">class</span>.tx_crawler_lib.php');
<a name="l01797"></a>01797         }
<a name="l01798"></a>01798 
<a name="l01799"></a>01799 
<a name="l01800"></a>01800 
<a name="l01801"></a>01801 
<a name="l01802"></a>01802 
<a name="l01803"></a>01803 
<a name="l01804"></a>01804 
<a name="l01805"></a>01805 
<a name="l01806"></a>01806 
<a name="l01807"></a>01807 
<a name="l01808"></a>01808         <span class="comment">/********************************</span>
<a name="l01809"></a>01809 <span class="comment">         *</span>
<a name="l01810"></a>01810 <span class="comment">         * SQL; Submitting words</span>
<a name="l01811"></a>01811 <span class="comment">         *</span>
<a name="l01812"></a>01812 <span class="comment">         *******************************/</span>
<a name="l01813"></a>01813 
<a name="l01820"></a><a class="code" href="classtx__indexedsearch__indexer.html#0583b9fa452dc814b6be43f5c21e5d80">01820</a>         function checkWordList($wl) {
<a name="l01821"></a>01821                 reset($wl);
<a name="l01822"></a>01822                 $phashArr = array();
<a name="l01823"></a>01823                 <span class="keywordflow">while</span>(list($key,) = each($wl)) {
<a name="l01824"></a>01824                         $phashArr[] = $wl[$key]['hash'];
<a name="l01825"></a>01825                 }
<a name="l01826"></a>01826                 <span class="keywordflow">if</span> (count($phashArr))   {
<a name="l01827"></a>01827                         $cwl = implode(<span class="charliteral">','</span>,$phashArr);
<a name="l01828"></a>01828                         $res = $GLOBALS['TYPO3_DB']-&gt;exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.<span class="charliteral">')'</span>);
<a name="l01829"></a>01829 
<a name="l01830"></a>01830                         <span class="keywordflow">if</span>($GLOBALS['TYPO3_DB']-&gt;sql_num_rows($res)!=count($wl)) {
<a name="l01831"></a>01831                                 $this-&gt;log_setTSlogMessage('Inserting words: '.(count($wl)-$GLOBALS['TYPO3_DB']-&gt;sql_num_rows($res)),1);
<a name="l01832"></a>01832                                 <span class="keywordflow">while</span>($row = $GLOBALS['TYPO3_DB']-&gt;sql_fetch_assoc($res)) {
<a name="l01833"></a>01833                                         unset($wl[$row['baseword']]);
<a name="l01834"></a>01834                                 }
<a name="l01835"></a>01835 
<a name="l01836"></a>01836                                 reset($wl);
<a name="l01837"></a>01837                                 <span class="keywordflow">while</span>(list($key,$val)=each($wl)) {
<a name="l01838"></a>01838                                         $insertFields = array(
<a name="l01839"></a>01839                                                 'wid' =&gt; $val['hash'],
<a name="l01840"></a>01840                                                 'baseword' =&gt; $key,
<a name="l01841"></a>01841                                                 'metaphone' =&gt; $val['metaphone']
<a name="l01842"></a>01842                                         );
<a name="l01843"></a>01843                                                 <span class="comment">// A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 60 chars (the baseword varchar is 60 characters...) this is not a problem.</span>
<a name="l01844"></a>01844                                         $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_words', $insertFields);
<a name="l01845"></a>01845                                 }
<a name="l01846"></a>01846                         }
<a name="l01847"></a>01847                 }
<a name="l01848"></a>01848         }
<a name="l01849"></a>01849 
<a name="l01857"></a><a class="code" href="classtx__indexedsearch__indexer.html#c0faf81393d9b7ecdc7bdd1bf6987865">01857</a>         function submitWords($wl,$phash) {
<a name="l01858"></a>01858                 $GLOBALS['TYPO3_DB']-&gt;exec_DELETEquery('index_rel', 'phash='.intval($phash));
<a name="l01859"></a>01859 
<a name="l01860"></a>01860                 foreach($wl as $val)    {
<a name="l01861"></a>01861                         $insertFields = array(
<a name="l01862"></a>01862                                 'phash' =&gt; $phash,
<a name="l01863"></a>01863                                 'wid' =&gt; $val['hash'],
<a name="l01864"></a>01864                                 'count' =&gt; $val['count'],
<a name="l01865"></a>01865                                 'first' =&gt; $val['first'],
<a name="l01866"></a>01866                                 'freq' =&gt; $this-&gt;freqMap(($val['count']/$this-&gt;wordcount)),
<a name="l01867"></a>01867                                 'flags' =&gt; ($val['cmp'] &amp; $this-&gt;flagBitMask)
<a name="l01868"></a>01868                         );
<a name="l01869"></a>01869 
<a name="l01870"></a>01870                         $GLOBALS['TYPO3_DB']-&gt;exec_INSERTquery('index_rel', $insertFields);
<a name="l01871"></a>01871                 }
<a name="l01872"></a>01872         }
<a name="l01873"></a>01873 
<a name="l01881"></a><a class="code" href="classtx__indexedsearch__indexer.html#6986df72695e54f9d9ffe8c2d478b736">01881</a>         function freqMap($freq) {
<a name="l01882"></a>01882                 $mapFactor = $this-&gt;freqMax*100*$this-&gt;freqRange;
<a name="l01883"></a>01883                 <span class="keywordflow">if</span>($freq&lt;1) {
<a name="l01884"></a>01884                         $newFreq = $freq*$mapFactor;
<a name="l01885"></a>01885                         $newFreq = $newFreq&gt;$this-&gt;freqRange?$this-&gt;freqRange:$newFreq;
<a name="l01886"></a>01886                 } <span class="keywordflow">else</span> {
<a name="l01887"></a>01887                         $newFreq = $freq/$mapFactor;
<a name="l01888"></a>01888                 }
<a name="l01889"></a>01889                 <span class="keywordflow">return</span> $newFreq;
<a name="l01890"></a>01890 
<a name="l01891"></a>01891         }
<a name="l01892"></a>01892 
<a name="l01893"></a>01893 
<a name="l01894"></a>01894 
<a name="l01895"></a>01895 
<a name="l01896"></a>01896 
<a name="l01897"></a>01897 
<a name="l01898"></a>01898 
<a name="l01899"></a>01899 
<a name="l01900"></a>01900 
<a name="l01901"></a>01901 
<a name="l01902"></a>01902 
<a name="l01903"></a>01903         <span class="comment">/********************************</span>
<a name="l01904"></a>01904 <span class="comment">         *</span>
<a name="l01905"></a>01905 <span class="comment">         * Hashing</span>
<a name="l01906"></a>01906 <span class="comment">         *</span>
<a name="l01907"></a>01907 <span class="comment">         *******************************/</span>
<a name="l01908"></a>01908 
<a name="l01914"></a><a class="code" href="classtx__indexedsearch__indexer.html#ba29b8cb5f0860797374b291a863b673">01914</a>         function setT3Hashes()  {
<a name="l01915"></a>01915 
<a name="l01916"></a>01916                         <span class="comment">//  Set main array:</span>
<a name="l01917"></a>01917                 $hArray = array(
<a name="l01918"></a>01918                         '<span class="keywordtype">id</span>' =&gt; (integer)$this-&gt;conf['<span class="keywordtype">id</span>'],
<a name="l01919"></a>01919                         'type' =&gt; (integer)$this-&gt;conf['type'],
<a name="l01920"></a>01920                         'sys_lang' =&gt; (integer)$this-&gt;conf['sys_language_uid'],
<a name="l01921"></a>01921                         'MP' =&gt; (string)$this-&gt;conf['MP'],
<a name="l01922"></a>01922                         'cHash' =&gt; $this-&gt;cHashParams
<a name="l01923"></a>01923                 );
<a name="l01924"></a>01924 
<a name="l01925"></a>01925                         <span class="comment">// Set grouping hash (Identifies a "page" combined of id, type, language, mountpoint and cHash parameters):</span>
<a name="l01926"></a>01926                 $this-&gt;hash['phash_grouping'] = $this-&gt;md5inthash(serialize($hArray));
<a name="l01927"></a>01927 
<a name="l01928"></a>01928                         <span class="comment">// Add gr_list and set plain phash (Subdivision where special page composition based on login is taken into account as well. It is expected that such pages are normally similar regardless of the login.)</span>
<a name="l01929"></a>01929                 $hArray['gr_list'] = (string)$this-&gt;conf['gr_list'];
<a name="l01930"></a>01930                 $this-&gt;hash['phash'] = $this-&gt;md5inthash(serialize($hArray));
<a name="l01931"></a>01931         }
<a name="l01932"></a>01932 
<a name="l01940"></a><a class="code" href="classtx__indexedsearch__indexer.html#a234a9689acbfbf1da00062a3bd417e5">01940</a>         function setExtHashes($file,$subinfo=array())   {
<a name="l01941"></a>01941                         <span class="comment">//  Set main array:</span>
<a name="l01942"></a>01942                 $hash = array();
<a name="l01943"></a>01943                 $hArray = array(
<a name="l01944"></a>01944                         'file' =&gt; $file,
<a name="l01945"></a>01945                 );
<a name="l01946"></a>01946 
<a name="l01947"></a>01947                         <span class="comment">// Set grouping hash:</span>
<a name="l01948"></a>01948                 $hash['phash_grouping'] = $this-&gt;md5inthash(serialize($hArray));
<a name="l01949"></a>01949 
<a name="l01950"></a>01950                         <span class="comment">// Add subinfo</span>
<a name="l01951"></a>01951                 $hArray['subinfo'] = $subinfo;
<a name="l01952"></a>01952                 $hash['phash'] = $this-&gt;md5inthash(serialize($hArray));
<a name="l01953"></a>01953 
<a name="l01954"></a>01954                 <span class="keywordflow">return</span> $hash;
<a name="l01955"></a>01955         }
<a name="l01956"></a>01956 
<a name="l01964"></a><a class="code" href="classtx__indexedsearch__indexer.html#7652caa06b5e1ed0aaa3ad337c852b9b">01964</a>         function md5inthash($str)       {
<a name="l01965"></a>01965                 <span class="keywordflow">return</span> hexdec(substr(md5($str),0,7));
<a name="l01966"></a>01966         }
<a name="l01967"></a>01967 
<a name="l01974"></a><a class="code" href="classtx__indexedsearch__indexer.html#93fbc95c58d077c932a0857eb6106c15">01974</a>         function makeCHash($paramArray) {
<a name="l01975"></a>01975                 $addQueryParams = <a class="code" href="classt3lib__div.html#7874ca6bd93d402c193542e864bb67f3">t3lib_div::implodeArrayForUrl</a>('', $paramArray);
<a name="l01976"></a>01976 
<a name="l01977"></a>01977                 $pA = <a class="code" href="classt3lib__div.html#9e0a1e00bf325fce2e9fbaea118ecdca">t3lib_div::cHashParams</a>($addQueryParams);
<a name="l01978"></a>01978 
<a name="l01979"></a>01979                 <span class="keywordflow">return</span> <a class="code" href="classt3lib__div.html#1e4206f70282a7cafb2c42c9fe6c1d5e">t3lib_div::shortMD5</a>(serialize($pA));
<a name="l01980"></a>01980         }
<a name="l01981"></a>01981 
<a name="l01982"></a>01982 
<a name="l01983"></a>01983 
<a name="l01984"></a>01984 
<a name="l01985"></a>01985 
<a name="l01986"></a>01986 
<a name="l01987"></a>01987 
<a name="l01988"></a>01988 
<a name="l01989"></a>01989 
<a name="l01990"></a>01990 
<a name="l01991"></a>01991 
<a name="l01992"></a>01992 
<a name="l01993"></a>01993         <span class="comment">/*********************************</span>
<a name="l01994"></a>01994 <span class="comment">         *</span>
<a name="l01995"></a>01995 <span class="comment">         * Internal logging functions</span>
<a name="l01996"></a>01996 <span class="comment">         *</span>
<a name="l01997"></a>01997 <span class="comment">         *********************************/</span>
<a name="l01998"></a>01998 
<a name="l02006"></a><a class="code" href="classtx__indexedsearch__indexer.html#06968115af4b21de48d2dbbffaad57d3">02006</a>         function log_push($msg,$key)    {
<a name="l02007"></a>02007                 <span class="keywordflow">if</span> (is_object($GLOBALS['TT']))          $GLOBALS['TT']-&gt;push($msg,$key);
<a name="l02008"></a>02008         }
<a name="l02009"></a>02009 
<a name="l02015"></a><a class="code" href="classtx__indexedsearch__indexer.html#695fa4d399aa441de19913cf696b968d">02015</a>         function log_pull()     {
<a name="l02016"></a>02016                 <span class="keywordflow">if</span> (is_object($GLOBALS['TT']))          $GLOBALS['TT']-&gt;pull();
<a name="l02017"></a>02017         }
<a name="l02018"></a>02018 
<a name="l02026"></a><a class="code" href="classtx__indexedsearch__indexer.html#c2fab59da8905a0e2c905cf18e53502f">02026</a>         function log_setTSlogMessage($msg, $errorNum=0) {
<a name="l02027"></a>02027                 <span class="keywordflow">if</span> (is_object($GLOBALS['TT']))          $GLOBALS['TT']-&gt;setTSlogMessage($msg,$errorNum);
<a name="l02028"></a>02028                 $this-&gt;internal_log[] = $msg;
<a name="l02029"></a>02029         }
<a name="l02030"></a>02030 
<a name="l02031"></a>02031 
<a name="l02032"></a>02032 
<a name="l02033"></a>02033 
<a name="l02034"></a>02034 
<a name="l02035"></a>02035 
<a name="l02036"></a>02036 
<a name="l02037"></a>02037 
<a name="l02038"></a>02038         <span class="comment">/**************************</span>
<a name="l02039"></a>02039 <span class="comment">         *</span>
<a name="l02040"></a>02040 <span class="comment">         * tslib_fe hooks:</span>
<a name="l02041"></a>02041 <span class="comment">         *</span>
<a name="l02042"></a>02042 <span class="comment">         **************************/</span>
<a name="l02043"></a>02043 
<a name="l02051"></a><a class="code" href="classtx__indexedsearch__indexer.html#2c0c30efc825742efa9527a9e7d7215d">02051</a>         function fe_headerNoCache(&amp;$params, $ref)       {
<a name="l02052"></a>02052 
<a name="l02053"></a>02053                         <span class="comment">// Requirements are that the crawler is loaded, a crawler session is running and re-indexing requested as processing instruction:</span>
<a name="l02054"></a>02054                 <span class="keywordflow">if</span> (<a class="code" href="classt3lib__extMgm.html#297116a3b1e17045ff193f170b8c4a29">t3lib_extMgm::isLoaded</a>('crawler')
<a name="l02055"></a>02055                                 &amp;&amp; $params['pObj']-&gt;applicationData['tx_crawler']['running']
<a name="l02056"></a>02056                                 &amp;&amp; in_array('tx_indexedsearch_reindex', $params['pObj']-&gt;applicationData['tx_crawler']['parameters']['procInstructions']))      {
<a name="l02057"></a>02057 
<a name="l02058"></a>02058                                 <span class="comment">// Setting simple log entry:</span>
<a name="l02059"></a>02059                         $params['pObj']-&gt;applicationData['tx_crawler']['log'][] = 'RE_CACHE (indexed), old status: '.$params['disableAcquireCacheData'];
<a name="l02060"></a>02060 
<a name="l02061"></a>02061                                 <span class="comment">// Disables a look-up for cached page data - thus resulting in re-generation of the page even if cached.</span>
<a name="l02062"></a>02062                         $params['disableAcquireCacheData'] = TRUE;
<a name="l02063"></a>02063                 }
<a name="l02064"></a>02064         }
<a name="l02065"></a>02065 }
<a name="l02066"></a>02066 
<a name="l02067"></a>02067 
<a name="l02068"></a>02068 <span class="keywordflow">if</span> (defined('TYPO3_MODE') &amp;&amp; $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/<span class="keyword">class</span>.indexer.php'])    {
<a name="l02069"></a>02069         include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/<span class="keyword">class</span>.indexer.php']);
<a name="l02070"></a>02070 }
<a name="l02071"></a>02071 ?&gt;
</pre></div><?php
  include_once '../doc-typo3-funcs.php';
  get_footer();
?>