Documentation TYPO3 par Ameos

class.tx_indexedsearch_modfunc1.php

00001 <?php
00002 /***************************************************************
00003 *  Copyright notice
00004 *
00005 *  (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)
00006 *  All rights reserved
00007 *
00008 *  This script is part of the TYPO3 project. The TYPO3 project is
00009 *  free software; you can redistribute it and/or modify
00010 *  it under the terms of the GNU General Public License as published by
00011 *  the Free Software Foundation; either version 2 of the License, or
00012 *  (at your option) any later version.
00013 *
00014 *  The GNU General Public License can be found at
00015 *  http://www.gnu.org/copyleft/gpl.html.
00016 *
00017 *  This script is distributed in the hope that it will be useful,
00018 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00019 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00020 *  GNU General Public License for more details.
00021 *
00022 *  This copyright notice MUST APPEAR in all copies of the script!
00023 ***************************************************************/
00088 require_once(PATH_t3lib.'class.t3lib_pagetree.php');
00089 require_once(PATH_t3lib.'class.t3lib_extobjbase.php');
00090 require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php');
00091 
00092 
00093         // ... all for the rootline!
00094 require_once (PATH_t3lib."class.t3lib_page.php");
00095 require_once (PATH_t3lib."class.t3lib_tstemplate.php");
00096 require_once (PATH_t3lib."class.t3lib_tsparser_ext.php");
00097 
00098         // Keywords mgm:
00099 require_once (PATH_t3lib."class.t3lib_tcemain.php");
00100 
00101 
00102 
00110 class tx_indexedsearch_modfunc1 extends t3lib_extobjbase {
00111 
00112                 // Internal, dynamic:
00113         var $allPhashListed = array();          // phash values accumulations for link to clear all
00114         var $external_parsers = array();        // External content parsers - objects set here with file extensions as keys.
00115         var $iconFileNameCache = array();       // File extensions - icon map/cache.
00116         var $indexerObj;                                        // Indexer object
00117 
00118 
00124     function modMenu()    {
00125         global $LANG;
00126 
00127                 return array (
00128                         'depth' => array(
00129                                 0 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_0'),
00130                                 1 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_1'),
00131                                 2 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_2'),
00132                                 3 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_3'),
00133                         ),
00134                         'type' => array(
00135                                 0 => 'Overview',
00136                                 1 => 'Technical Details',
00137                                 2 => 'Words and content',
00138 //                              3 => 'Indexing'
00139                         )
00140                 );
00141     }
00142 
00148     function main()    {
00149             // Initializes the module. Done in this function because we may need to re-initialize if data is submitted!
00150         global $SOBE,$BE_USER,$LANG,$BACK_PATH,$TCA_DESCR,$TCA,$CLIENT,$TYPO3_CONF_VARS;
00151 
00152                         // Return if no page id:
00153                 if ($this->pObj->id<=0)         return;
00154 
00155                         // Initialize max-list items
00156                 $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100;
00157 
00158                         // Processing deletion of phash rows:
00159                 if (t3lib_div::_GP('deletePhash'))      {
00160                         $this->removeIndexedPhashRow(t3lib_div::_GP('deletePhash'));
00161                 }
00162 
00163                         // Processing stop-words:
00164                 if (t3lib_div::_POST('_stopwords'))     {
00165                         $this->processStopWords(t3lib_div::_POST('stopWord'));
00166                 }
00167 
00168                         // Processing stop-words:
00169                 if (t3lib_div::_POST('_pageKeywords'))  {
00170                         $this->processPageKeywords(t3lib_div::_POST('pageKeyword'), t3lib_div::_POST('pageKeyword_pageUid'));
00171                 }
00172 
00173                         // Initialize external document parsers:
00174                         // Example configuration, see ext_localconf.php of this file!
00175                 if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers']))        {
00176                         foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef)    {
00177                                 $this->external_parsers[$extension] = &t3lib_div::getUserObj($_objRef);
00178 
00179                                         // Init parser and if it returns false, unset its entry again:
00180                                 if (!$this->external_parsers[$extension]->softInit($extension)) {
00181                                         unset($this->external_parsers[$extension]);
00182                                 }
00183                         }
00184                 }
00185 
00186                         // Initialize indexer if we need it (metaphone display does...)
00187                 $this->indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
00188 
00189                         // Set CSS styles specific for this document:
00190                 $this->pObj->content = str_replace('/*###POSTCSSMARKER###*/','
00191                         TABLE.c-list TR TD { white-space: nowrap; vertical-align: top; }
00192                 ',$this->pObj->content);
00193 
00194 
00195                         // Check if details for a phash record should be shown:
00196                 if (t3lib_div::_GET('phash'))   {
00197 
00198                                 // Show title / function menu:
00199                         $theOutput.=$this->pObj->doc->spacer(5);
00200                         $theOutput.=$this->pObj->doc->section('Details for a single result row:',$this->showDetailsForPhash(t3lib_div::_GET('phash')),0,1);
00201                 } elseif (t3lib_div::_GET('wid'))       {
00202 
00203                                 // Show title / function menu:
00204                         $theOutput.=$this->pObj->doc->spacer(5);
00205                         $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1);
00206                 } elseif (t3lib_div::_GET('metaphone')) {
00207 
00208                                 // Show title / function menu:
00209                         $theOutput.=$this->pObj->doc->spacer(5);
00210                         $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1);
00211                 } elseif (t3lib_div::_GET('reindex'))   {
00212 
00213                                 // Show title / function menu:
00214                         $theOutput.=$this->pObj->doc->spacer(5);
00215                         $theOutput.=$this->pObj->doc->section('Reindexing...',$this->reindexPhash(t3lib_div::_GET('reindex'),t3lib_div::_GET('reindex_id')),0,1);
00216                 } else {        // Detail listings:
00217                                 // Depth function menu:
00218                         $h_func = t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[type]',$this->pObj->MOD_SETTINGS['type'],$this->pObj->MOD_MENU['type'],'index.php');
00219                         if (t3lib_div::inList('0,1,2',$this->pObj->MOD_SETTINGS['type']))       {
00220                                 $h_func.= t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[depth]',$this->pObj->MOD_SETTINGS['depth'],$this->pObj->MOD_MENU['depth'],'index.php');
00221 
00222                                         // Show title / function menu:
00223                                 $theOutput.=$this->pObj->doc->spacer(5);
00224                                 $theOutput.=$this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1);
00225 
00226                                 $theOutput.=$this->drawTableOfIndexedPages();
00227                         } else {
00228 
00229                                         // Show title / function menu:
00230                                 $theOutput.= $this->pObj->doc->spacer(5);
00231                                 $theOutput.= $this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1);
00232 
00233                                 $theOutput.= $this->extraIndexing();
00234                         }
00235                 }
00236 
00237         return $theOutput;
00238     }
00239 
00240 
00241 
00242 
00243 
00244 
00245 
00246 
00247 
00248 
00249 
00250         /*******************************
00251          *
00252          * Drawing table of indexed pages
00253          *
00254          ******************************/
00255 
00261         function drawTableOfIndexedPages()      {
00262                 global $BACK_PATH;
00263 
00264                         // Drawing tree:
00265                 $tree = t3lib_div::makeInstance('t3lib_pageTree');
00266                 $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1);
00267                 $tree->init('AND '.$perms_clause);
00268 
00269                 $HTML = '<img src="'.$BACK_PATH.t3lib_iconWorks::getIcon('pages',$this->pObj->pageinfo).'" width="18" height="16" align="top" alt="" />';
00270                 $tree->tree[] = Array(
00271                         'row' => $this->pObj->pageinfo,
00272                         'HTML' => $HTML
00273                 );
00274 
00275                 if ($this->pObj->MOD_SETTINGS['depth']) {
00276                         $tree->getTree($this->pObj->id, $this->pObj->MOD_SETTINGS['depth'], '');
00277                 }
00278 
00279                         // Traverse page tree:
00280                 $code = '';
00281                 foreach($tree->tree as $data)   {
00282                         $code.= $this->indexed_info(
00283                                                 $data['row'],
00284                                                 $data['HTML'].
00285                                                         $this->showPageDetails(t3lib_div::fixed_lgd($data['row']['title'], 20),$data['row']['uid'])
00286                                         );
00287                 }
00288 
00289                 if ($code)      {
00290                         $code = '<br/><br/>
00291                                         <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
00292                                                 $this->printPhashRowHeader().
00293                                                 $code.
00294                                         '</table>';
00295 
00296                                 // Create section to output:
00297                         $theOutput.=$this->pObj->doc->section('',$code,0,1);
00298                 } else {
00299                         $theOutput.=$this->pObj->doc->section('','<br/><br/>'.$this->pObj->doc->icons(1).'There were no indexed pages found in the tree.<br/><br/>',0,1);
00300                 }
00301 
00302                 return  $theOutput;
00303         }
00304 
00312         function indexed_info($data, $firstColContent)  {
00313 
00314                         // Query:
00315                 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
00316                                         'ISEC.*, IP.*, count(*) AS count_val',
00317                                         'index_phash IP, index_section ISEC',
00318                                         'IP.phash = ISEC.phash AND ISEC.page_id = '.intval($data['uid']),
00319                                         'IP.phash,IP.phash_grouping,IP.cHashParams,IP.data_filename,IP.data_page_id,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.externalUrl,IP.recordUid,IP.freeIndexUid',
00320                                         'IP.item_type, IP.tstamp',
00321                                         ($this->maxListPerPage+1)
00322                                 );
00323 
00324                         // Initialize variables:
00325                 $rowCount = 0;
00326                 $lines = array();               // Collecting HTML rows here.
00327                 $phashAcc = array();    // Collecting phash values (to remove local indexing for)
00328                 $phashAcc[] = 0;
00329 
00330                         // Traverse the result set of phash rows selected:
00331                 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))       {
00332                         if ($rowCount == $this->maxListPerPage) {
00333                                 $rowCount++;    // Increase to the extra warning row will appear as well.
00334                                 break;
00335                         }
00336 
00337                                 // Adds a display row:
00338                         $lines[$row['phash_grouping']][] = $this->printPhashRow(
00339                                                 $row,
00340                                                 isset($lines[$row['phash_grouping']]),
00341                                                 $this->getGrListEntriesForPhash($row['phash'], $row['gr_list'])
00342                                         );
00343                         $rowCount++;
00344                         $phashAcc[] = $row['phash'];
00345                         $this->allPhashListed[] = $row['phash'];        // For removing all shown phash rows.
00346                 }
00347 
00348                         // Compile rows into the table:
00349                 $out = '';
00350                 if (count($lines))      {
00351                         $firstColContent = '<td rowspan="'.$rowCount.'">'.$firstColContent.'</td>';
00352                         foreach($lines as $rowSet)      {
00353                                 foreach($rowSet as $rows)       {
00354                                         $out.='
00355                                                 <tr class="bgColor-20">'.$firstColContent.implode('',$rows).'</tr>';
00356 
00357                                         $firstColContent = '';
00358                                 }
00359                         }
00360 
00361                         if ($rowCount > $this->maxListPerPage)  {       // Now checking greater than, because we increased $rowCount before...
00362                                 $out.='
00363                                 <tr class="bgColor-20">
00364                                         <td>&nbsp;</td>
00365                                         <td colspan="'.($this->returnNumberOfColumns()-1).'">'.$this->pObj->doc->icons(3).'<span class="">There were more than '.$this->maxListPerPage.' rows. <a href="'.htmlspecialchars('index.php?id='.$this->pObj->id.'&listALL=1').'">Click here to list them ALL!</a></span></td>
00366                                 </tr>';
00367                         }
00368                 } else {
00369                         $out.='
00370                                 <tr class="bgColor-20">
00371                                         <td>'.$firstColContent.'</td>
00372                                         <td colspan="'.($this->returnNumberOfColumns()-1).'"><em>Not indexed</em></td>
00373                                 </tr>';
00374                 }
00375 
00376                         // Checking for phash-rows which are NOT joined with the section table:
00377                 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('IP.*', 'index_phash IP', 'IP.data_page_id = '.intval($data['uid']).' AND IP.phash NOT IN ('.implode(',',$phashAcc).')');
00378                 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))       {
00379                         $out.='
00380                                 <tr class="typo3-red">
00381                                         <td colspan="'.$this->returnNumberOfColumns().'"><b>Warning:</b> phash-row "'.$row['phash'].'" didn\'t have a representation in the index_section table!</td>
00382                                 </tr>';
00383                         $this->allPhashListed[] = $row['phash'];
00384                 }
00385 
00386                 return $out;
00387         }
00388 
00398         function printPhashRow($row,$grouping=0,$extraGrListRows)       {
00399                 $lines = array();
00400 
00401                         // Title cell attributes will highlight TYPO3 pages with a slightly darker color (bgColor4) than attached medias. Also IF there are more than one section record for a phash row it will be red as a warning that something is wrong!
00402                 $titleCellAttribs = $row['count_val']!=1?' bgcolor="red"':($row['item_type']==='0' ? ' class="bgColor4"' : '');
00403 
00404                 if ($row['item_type'])  {
00405                         $arr = unserialize($row['cHashParams']);
00406                         $page = $arr['key'] ? ' ['.$arr['key'].']' : '';
00407                 } else $page = '';
00408                 $elTitle = $this->linkDetails($row['item_title'] ? htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($row['item_title']), 20).$page) : '<em>[No Title]</em>',$row['phash']);
00409                 $cmdLinks = $this->printRemoveIndexed($row['phash'],'Clear phash-row').$this->printReindex($row,'Re-index element');
00410 
00411                 switch($this->pObj->MOD_SETTINGS['type'])       {
00412                         case 1:         // Technical details:
00413                                         // Display icon:
00414                                 if (!$grouping) {
00415                                         $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
00416                                 } else {
00417                                         $lines[] = '<td>&nbsp;</td>';
00418                                 }
00419 
00420                                         // Title displayed:
00421                                 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
00422 
00423                                         // Remove-indexing-link:
00424                                 $lines[] = '<td>'.$cmdLinks.'</td>';
00425 
00426                                         // Various data:
00427                                 $lines[] = '<td>'.$row['phash'].'</td>';
00428                                 $lines[] = '<td>'.$row['contentHash'].'</td>';
00429 
00430                                 if ($row['item_type']==='0')    {
00431                                         $lines[] = '<td>'.($row['data_page_id'] ? $row['data_page_id'] : '&nbsp;').'</td>';
00432                                         $lines[] = '<td>'.($row['data_page_type'] ? $row['data_page_type'] : '&nbsp;').'</td>';
00433                                         $lines[] = '<td>'.($row['sys_language_uid'] ? $row['sys_language_uid'] : '&nbsp;').'</td>';
00434                                         $lines[] = '<td>'.($row['data_page_mp'] ? $row['data_page_mp'] : '&nbsp;').'</td>';
00435                                 } else {
00436                                         $lines[] = '<td colspan="4">'.htmlspecialchars($row['data_filename']).'</td>';
00437                                 }
00438                                 $lines[] = '<td>'.$row['gr_list'].$this->printExtraGrListRows($extraGrListRows).'</td>';
00439                                 $lines[] = '<td>'.$this->printRootlineInfo($row).'</td>';
00440                                 $lines[] = '<td>'.($row['page_id'] ? $row['page_id'] : '&nbsp;').'</td>';
00441                                 $lines[] = '<td>'.($row['phash_t3']!=$row['phash'] ? $row['phash_t3'] : '&nbsp;').'</td>';
00442                                 $lines[] = '<td>'.($row['freeIndexUid'] ? $row['freeIndexUid'] : '&nbsp;').'</td>';
00443                                 $lines[] = '<td>'.($row['recordUid'] ? $row['recordUid'] : '&nbsp;').'</td>';
00444 
00445 
00446 
00447                                         // cHash parameters:
00448                                 $arr = unserialize($row['cHashParams']);
00449                                 if (is_array($arr))             {
00450                                         $theCHash = $arr['cHash'];
00451                                         unset($arr['cHash']);
00452                                 }
00453 
00454                                 if ($row['item_type'])  {       // pdf...
00455                                         $lines[] = '<td>'.($arr['key'] ? 'Page '.$arr['key'] : '').'&nbsp;</td>';
00456                                 } elseif ($row['item_type']==0) {
00457                                         $lines[] = '<td>'.htmlspecialchars(t3lib_div::implodeArrayForUrl('',$arr)).'&nbsp;</td>';
00458                                 } else {
00459                                         $lines[] = '<td class="bgColor">&nbsp;</td>';
00460                                 }
00461 
00462                                 $lines[] = '<td>'.$theCHash.'</td>';
00463                         break;
00464                         case 2:         // Words and content:
00465                                         // Display icon:
00466                                 if (!$grouping) {
00467                                         $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
00468                                 } else {
00469                                         $lines[] = '<td>&nbsp;</td>';
00470                                 }
00471 
00472                                         // Title displayed:
00473                                 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
00474 
00475                                         // Remove-indexing-link:
00476                                 $lines[] = '<td>'.$cmdLinks.'</td>';
00477 
00478                                         // Query:
00479                                 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00480                                                         '*',
00481                                                         'index_fulltext',
00482                                                         'phash = '.intval($row['phash'])
00483                                                 );
00484                                 $lines[] = '<td style="white-space: normal;">'.
00485                                                         htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($ftrows[0]['fulltextdata']),3000)).
00486                                                         '<hr/><em>Size: '.strlen($ftrows[0]['fulltextdata']).'</em>'.
00487                                                         '</td>';
00488 
00489                                         // Query:
00490                                 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00491                                                         'index_words.baseword, index_rel.*',
00492                                                         'index_rel, index_words',
00493                                                         'index_rel.phash = '.intval($row['phash']).
00494                                                                 ' AND index_words.wid = index_rel.wid',
00495                                                         '',
00496                                                         '',
00497                                                         '',
00498                                                         'baseword'
00499                                                 );
00500 
00501                                 $wordList = '';
00502                                 if (is_array($ftrows))  {
00503                                         $indexed_words = array_keys($ftrows);
00504                                         sort($indexed_words);
00505                                         $wordList = htmlspecialchars($this->utf8_to_currentCharset(implode(' ',$indexed_words)));
00506                                         $wordList.='<hr/><em>Count: '.count($indexed_words).'</em>';
00507                                 }
00508 
00509                                 $lines[] = '<td style="white-space: normal;">'.$wordList.'</td>';
00510                         break;
00511                         default:        // Overview
00512                                         // Display icon:
00513                                 if (!$grouping) {
00514                                         $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>';
00515                                 } else {
00516                                         $lines[] = '<td>&nbsp;</td>';
00517                                 }
00518 
00519                                         // Title displayed:
00520                                 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>';
00521 
00522                                         // Remove-indexing-link:
00523                                 $lines[] = '<td>'.$cmdLinks.'</td>';
00524 
00525                                 $lines[] = '<td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset($row['item_description'])).'...</td>';
00526                                 $lines[] = '<td>'.t3lib_div::formatSize($row['item_size']).'</td>';
00527                                 $lines[] = '<td>'.t3lib_BEfunc::dateTimeAge($row['tstamp']).'</td>';
00528                         break;
00529                 }
00530 
00531                 return $lines;
00532         }
00533 
00539         function printPhashRowHeader()  {
00540                 $lines = array();
00541 
00542                 switch($this->pObj->MOD_SETTINGS['type'])       {
00543                         case 1:
00544                                 $lines[] = '<td>&nbsp;</td>';
00545                                 $lines[] = '<td>&nbsp;</td>';
00546                                 $lines[] = '<td>Title</td>';
00547                                 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
00548 
00549                                 $lines[] = '<td>pHash</td>';
00550                                 $lines[] = '<td>cHash</td>';
00551                                 $lines[] = '<td>&amp;id</td>';
00552                                 $lines[] = '<td>&amp;type</td>';
00553                                 $lines[] = '<td>&amp;L</td>';
00554                                 $lines[] = '<td>&amp;MP</td>';
00555                                 $lines[] = '<td>grlist</td>';
00556                                 $lines[] = '<td>Rootline</td>';
00557                                 $lines[] = '<td>page_id</td>';
00558                                 $lines[] = '<td>phash_t3</td>';
00559                                 $lines[] = '<td>CfgUid</td>';
00560                                 $lines[] = '<td>RecUid</td>';
00561                                 $lines[] = '<td>GET-parameters</td>';
00562                                 $lines[] = '<td>&amp;cHash</td>';
00563                         break;
00564                         case 2:
00565                                 $lines[] = '<td>&nbsp;</td>';
00566                                 $lines[] = '<td>&nbsp;</td>';
00567                                 $lines[] = '<td>Title</td>';
00568                                 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
00569                                 $lines[] = '<td>Content<br/>
00570                                                         <img src="clear.gif" width="300" height="1" alt="" /></td>';
00571                                 $lines[] = '<td>Words<br/>
00572                                                         <img src="clear.gif" width="300" height="1" alt="" /></td>';
00573                         break;
00574                         default:
00575                                 $lines[] = '<td>&nbsp;</td>';
00576                                 $lines[] = '<td>&nbsp;</td>';
00577                                 $lines[] = '<td>Title</td>';
00578                                 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>';
00579                                 $lines[] = '<td>Description</td>';
00580                                 $lines[] = '<td>Size</td>';
00581                                 $lines[] = '<td>Indexed:</td>';
00582                         break;
00583                 }
00584 
00585                 $out = '<tr class="tableheader bgColor5">'.implode('',$lines).'</tr>';
00586                 return $out;
00587         }
00588 
00594         function returnNumberOfColumns()        {
00595                 switch($this->pObj->MOD_SETTINGS['type'])       {
00596                         case 1:
00597                                 return 18;
00598                         break;
00599                         case 2:
00600                                 return 6;
00601                         break;
00602                         default:
00603                                 return 7;
00604                         break;
00605                 }
00606         }
00607 
00608 
00609 
00610 
00611 
00612 
00613 
00614 
00615 
00616 
00617 
00618         /*******************************
00619          *
00620          * Details display, phash row
00621          *
00622          *******************************/
00623 
00630         function showDetailsForPhash($phash)    {
00631 
00632                 $content = '';
00633 
00634                         // Selects the result row:
00635                 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00636                                         '*',
00637                                         'index_phash',
00638                                         'phash = '.intval($phash)
00639                                 );
00640                 $phashRecord = $ftrows[0];
00641 
00642                         // If found, display:
00643                 if (is_array($phashRecord))     {
00644                         $content.= '<h4>phash row content:</h4>'.
00645                                                 $this->utf8_to_currentCharset(t3lib_div::view_array($phashRecord));
00646 
00647                                 // Getting debug information if any:
00648                         $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00649                                                 '*',
00650                                                 'index_debug',
00651                                                 'phash = '.intval($phash)
00652                                         );
00653                         if (is_array($ftrows))  {
00654                                 $debugInfo = unserialize($ftrows[0]['debuginfo']);
00655                                 $lexer = $debugInfo['lexer'];
00656                                 unset($debugInfo['lexer']);
00657 
00658                                 $content.= '<h3>Debug information:</h3>'.
00659                                                 $this->utf8_to_currentCharset(t3lib_div::view_array($debugInfo));
00660 
00661                                 $content.= '<h4>Debug information / lexer splitting:</h4>'.
00662                                                 '<hr/><b>'.
00663                                                 $this->utf8_to_currentCharset($lexer).
00664                                                 '</b><hr/>';
00665                         }
00666 
00667 
00668 
00669                         $content.='<h3>Word statistics</h3>';
00670 
00671                                 // Finding all words for this phash:
00672                         $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00673                                                 'index_words.*, index_rel.*',
00674                                                 'index_rel, index_words',
00675                                                 'index_rel.phash = '.intval($phash).
00676                                                         ' AND index_words.wid = index_rel.wid',
00677                                                 '',
00678                                                 'index_words.baseword',
00679                                                 ''
00680                                         );
00681                         $pageRec = t3lib_BEfunc::getRecord('pages', $phashRecord['data_page_id']);
00682                         $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin();
00683                         $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec);
00684 
00685                                 // Group metaphone hash:
00686                         $metaphone = array();
00687                         foreach($ftrows as $row)        {
00688                                 $metaphone[$row['metaphone']][] = $row['baseword'];
00689                         }
00690                         $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:');
00691 
00692                                 // Finding top-20 on frequency for this phash:
00693                         $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00694                                                 'index_words.baseword, index_words.metaphone, index_rel.*',
00695                                                 'index_rel, index_words',
00696                                                 'index_rel.phash = '.intval($phash).
00697                                                         ' AND index_words.wid = index_rel.wid
00698                                                          AND index_words.is_stopword=0',
00699                                                 '',
00700                                                 'index_rel.freq DESC',
00701                                                 '20'
00702                                         );
00703                         $content.= $this->listWords($ftrows, 'Top-20 words by frequency:', 2);
00704 
00705                                 // Finding top-20 on count for this phash:
00706                         $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00707                                                 'index_words.baseword, index_words.metaphone, index_rel.*',
00708                                                 'index_rel, index_words',
00709                                                 'index_rel.phash = '.intval($phash).
00710                                                         ' AND index_words.wid = index_rel.wid
00711                                                          AND index_words.is_stopword=0',
00712                                                 '',
00713                                                 'index_rel.count DESC',
00714                                                 '20'
00715                                         );
00716                         $content.= $this->listWords($ftrows, 'Top-20 words by count:', 2);
00717 
00718 
00719                         $content.='<h3>Section records for this phash</h3>';
00720 
00721                                 // Finding sections for this record:
00722                         $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00723                                                 '*',
00724                                                 'index_section',
00725                                                 'index_section.phash = '.intval($phash),
00726                                                 '',
00727                                                 '',
00728                                                 ''
00729                                         );
00730                         $content.= t3lib_div::view_array($ftrows);
00731 
00732                                 // Add go-back link:
00733                         $content = $this->linkList().$content.$this->linkList();
00734 
00735                 } else $content.= 'Error: No phash row found';
00736 
00737                 return $content;
00738         }
00739 
00749         function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='')     {
00750 
00751                         // Prepare keywords:
00752                 $keywords = is_array($page) ? array_flip(t3lib_div::trimExplode(',',$page['keywords'], 1)) : '';
00753 
00754                         // Render list:
00755                 $trows = '';
00756                 $trows.= '
00757                         <tr class="tableheader bgColor5">
00758                                 '.($stopWordBoxes ? '<td>'.htmlspecialchars('Stopword:').'</td>' : '').'
00759                                 <td>'.htmlspecialchars('Word:').'</td>
00760                                 <td>'.htmlspecialchars('Count:').'</td>
00761                                 <td>'.htmlspecialchars('First:').'</td>
00762                                 <td>'.htmlspecialchars('Frequency:').'</td>
00763                                 <td>'.htmlspecialchars('Flags:').'</td>
00764                                 '.(is_array($keywords) ? '<td>'.htmlspecialchars('Page keyword:').'</td>' : '').'
00765                         </tr>
00766                 ';
00767                 foreach($ftrows as $row)        {
00768                         $hiddenField = $stopWordBoxes!=2 ? '<input type="hidden" name="stopWord['.$row['wid'].']" value="0" />' : '';
00769                         $trows.= '
00770                                 <tr class="'.($row['is_stopword'] ? 'bgColor' : 'bgColor4').'">
00771                                         '.($stopWordBoxes ? '<td align="center"'.($row['is_stopword'] ? ' style="background-color:red;"' : '').'>'.$hiddenField.'<input type="checkbox" name="stopWord['.$row['wid'].']" value="1"'.($row['is_stopword']?'checked="checked"':'').' /></td>' : '').'
00772                                         <td>'.$this->linkWordDetails(htmlspecialchars($this->utf8_to_currentCharset($row['baseword'])), $row['wid']).'</td>
00773                                         <td>'.htmlspecialchars($row['count']).'</td>
00774                                         <td>'.htmlspecialchars($row['first']).'</td>
00775                                         <td>'.htmlspecialchars($row['freq']).'</td>
00776                                         <td>'.htmlspecialchars($this->flagsMsg($row['flags'])).'</td>
00777                                         '.(is_array($keywords) ? '<td align="center"'.(isset($keywords[$row['baseword']]) ? ' class="bgColor2"' : '').'><input type="hidden" name="pageKeyword['.$row['baseword'].']" value="0" /><input type="checkbox" name="pageKeyword['.$row['baseword'].']" value="1"'.(isset($keywords[$row['baseword']])?'checked="checked"':'').' /></td>' : '').'
00778                                 </tr>
00779                         ';
00780                 }
00781 
00782                 return '<h4>'.htmlspecialchars($header).'</h4>'.
00783                                         '
00784                                         <table border="0" cellspacing="1" cellpadding="2" class="c-list">
00785                                         '.$trows.'
00786                                         </table>'.
00787                                         ($stopWordBoxes ? '<input type="submit" value="Change stop-word settings" name="_stopwords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" />' : '').
00788                                         (is_array($keywords) ? '<input type="submit" value="Set page keywords" name="_pageKeywords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" /><input type="hidden" name="pageKeyword_pageUid" value="'.$page['uid'].'" />'.
00789                                                                                 '<br/>Current keywords are: <em>'.htmlspecialchars(implode(', ',array_keys($keywords))).'</em>' : '');
00790         }
00791 
00799         function listMetaphoneStat($ftrows,$header)     {
00800 
00801                 $trows = '';
00802                 $trows.= '
00803                         <tr class="tableheader bgColor5">
00804                                 <td>'.htmlspecialchars('Metaphone:').'</td>
00805                                 <td>'.htmlspecialchars('Hash:').'</td>
00806                                 <td>'.htmlspecialchars('Count:').'</td>
00807                                 <td>'.htmlspecialchars('Words:').'</td>
00808                         </tr>
00809                 ';
00810                 foreach($ftrows as $metaphone => $words)        {
00811                         if (count($words)>1)    {
00812                                 $trows.= '
00813                                         <tr class="bgColor4">
00814                                                 <td>'.$this->linkMetaPhoneDetails($this->indexerObj->metaphone($words[0],1),$metaphone).'</td>
00815                                                 <td>'.htmlspecialchars($metaphone).'</td>
00816                                                 <td>'.htmlspecialchars(count($words)).'</td>
00817                                                 <td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset(implode(', ',$words))).'</td>
00818                                         </tr>
00819                                 ';
00820                         }
00821                 }
00822 
00823                 return '<h4>'.htmlspecialchars($header).'</h4>'.
00824                                         '<table border="0" cellspacing="1" cellpadding="2" class="c-list">
00825                                         '.$trows.'
00826                                         </table>';
00827         }
00828 
00836         function linkWordDetails($string,$wid)  {
00837                 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('wid'=>$wid,'phash'=>''))).'">'.$string.'</a>';
00838         }
00839 
00840 
00848         function linkMetaPhoneDetails($string,$metaphone)       {
00849                 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('metaphone'=>$metaphone,'wid'=>'','phash'=>''))).'">'.$string.'</a>';
00850         }
00851 
00858         function flagsMsg($flags)       {
00859                 if ($flags > 0) {
00860                         return
00861                                 ($flags & 128 ? '<title>' : '').        // pow(2,7)
00862                                 ($flags & 64 ? '<meta/keywords>' : ''). // pow(2,6)
00863                                 ($flags & 32 ? '<meta/description>' : '').      // pow(2,5)
00864                                 ' ('.$flags.')';
00865                 }
00866         }
00867 
00868 
00869 
00870 
00871 
00872 
00873 
00874 
00875 
00876 
00877         /*******************************
00878          *
00879          * Details display, words / metaphone
00880          *
00881          *******************************/
00882 
00889         function showDetailsForWord($wid)       {
00890 
00891                         // Select references to this word
00892                 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00893                                         'index_phash.*, index_section.*, index_rel.*',
00894                                         'index_rel, index_section, index_phash',
00895                                         'index_rel.wid = '.intval($wid).
00896                                                 ' AND index_rel.phash = index_section.phash'.
00897                                                 ' AND index_section.phash = index_phash.phash',
00898                                         '',
00899                                         'index_rel.freq DESC',
00900                                         ''
00901                                 );
00902 
00903                         // Headers:
00904                 $content.='
00905                         <tr class="tableheader bgColor5">
00906                                 <td>phash</td>
00907                                 <td>page_id</td>
00908                                 <td>data_filename</td>
00909                                 <td>count</td>
00910                                 <td>first</td>
00911                                 <td>freq</td>
00912                                 <td>flags</td>
00913                         </tr>';
00914 
00915                 if (is_array($ftrows))  {
00916                         foreach($ftrows as $wDat)       {
00917                                 $content.='
00918                                         <tr class="bgColor4">
00919                                                 <td>'.$this->linkDetails(htmlspecialchars($wDat['phash']),$wDat['phash']).'</td>
00920                                                 <td>'.htmlspecialchars($wDat['page_id']).'</td>
00921                                                 <td>'.htmlspecialchars($wDat['data_filename']).'</td>
00922                                                 <td>'.htmlspecialchars($wDat['count']).'</td>
00923                                                 <td>'.htmlspecialchars($wDat['first']).'</td>
00924                                                 <td>'.htmlspecialchars($wDat['freq']).'</td>
00925                                                 <td>'.htmlspecialchars($wDat['flags']).'</td>
00926                                         </tr>';
00927                         }
00928                 }
00929 
00930                         // Compile table:
00931                 $content = '
00932                         <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
00933                                 $content.'
00934                         </table>';
00935 
00936                         // Add go-back link:
00937                 $content = $content.$this->linkList();
00938 
00939                 return $content;
00940         }
00941 
00948         function showDetailsForMetaphone($metaphone)    {
00949 
00950                         // Finding top-20 on frequency for this phash:
00951                 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
00952                                         'index_words.*',
00953                                         'index_words',
00954                                         'index_words.metaphone = '.intval($metaphone),
00955                                         '',
00956                                         'index_words.baseword',
00957                                         ''
00958                                 );
00959 
00960                 if (count($ftrows))     {
00961                         $content.='<h4>Metaphone: '.$this->indexerObj->metaphone($ftrows[0]['baseword'],1).'</h4>';
00962 
00963                         $content.='
00964                                 <tr class="tableheader bgColor5">
00965                                         <td>Word</td>
00966                                         <td>Is stopword?</td>
00967                                 </tr>';
00968 
00969                         if (is_array($ftrows))  {
00970                                 foreach($ftrows as $wDat)       {
00971                                         $content.='
00972                                                 <tr class="bgColor4">
00973                                                         <td>'.$this->linkWordDetails(htmlspecialchars($wDat['baseword']),$wDat['wid']).'</td>
00974                                                         <td>'.htmlspecialchars($wDat['is_stopword'] ? 'YES' : 'No').'</td>
00975                                                 </tr>';
00976                                 }
00977                         }
00978 
00979                         $content = '
00980                                 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'.
00981                                         $content.'
00982                                 </table>';
00983 
00984                         if ($this->indexerObj->metaphone($ftrows[0]['baseword'])!=$metaphone)   {
00985                                 $content.='ERROR: Metaphone string and hash did not match for some reason!?';
00986                         }
00987 
00988                                 // Add go-back link:
00989                         $content = $content.$this->linkList();
00990                 }
00991 
00992                 return $content;
00993         }
00994 
00995 
00996 
00997 
00998 
00999 
01000 
01001 
01002 
01003 
01004 
01005 
01006         /*******************************
01007          *
01008          * Helper functions
01009          *
01010          *******************************/
01011 
01019         function printRemoveIndexed($phash,$alt)        {
01020                 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('deletePhash'=>$phash))).'">'.
01021                                 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/garbage.gif" width="11" hspace="1" vspace="2" height="12" border="0" title="'.htmlspecialchars($alt).'" alt="" />'.
01022                                 '</a>';
01023         }
01024 
01032         function printReindex($resultRow,$alt)  {
01033                 if ($resultRow['item_type'] && $resultRow['item_type']!=='0')   {
01034                         return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('reindex'=>$resultRow['phash'],'reindex_id'=>$resultRow['page_id']))).'">'.
01035                                         '<img src="'.$GLOBALS['BACK_PATH'].'gfx/refresh_n.gif" width="14" hspace="1" vspace="2" height="14" border="0" title="'.htmlspecialchars($alt).'" alt="" />'.
01036                                         '</a>';
01037                 }
01038         }
01039 
01047         function linkDetails($string,$phash)    {
01048                 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('phash'=>$phash))).'">'.$string.'</a>';
01049         }
01050 
01056         function linkList()     {
01057                 return '<br/><a href="index.php?id='.$this->pObj->id.'">Back to list.</a><br/>';
01058         }
01059 
01067         function showPageDetails($string,$id)   {
01068                 return '<a href="'.htmlspecialchars('index.php?id='.$id.'&SET[depth]=0&SET[type]=1').'">'.$string.'</a>';
01069         }
01070 
01077         function printExtraGrListRows($extraGrListRows) {
01078                 if (count($extraGrListRows))    {
01079                         reset($extraGrListRows);
01080                         $lines=array();
01081                         while(list(,$r)=each($extraGrListRows)) {
01082                                 $lines[] = $r['gr_list'];
01083                         }
01084                         return "<br/>".$GLOBALS['TBE_TEMPLATE']->dfw(implode('<br/>',$lines));
01085                 }
01086         }
01087 
01094         function printRootlineInfo($row)        {
01095                 $uidCollection = array();
01096 
01097                 if ($row['rl0'])        {
01098                         $uidCollection[0] = $row['rl0'];
01099                         if ($row['rl1'])        {
01100                                 $uidCollection[1] = $row['rl1'];
01101                                 if ($row['rl2'])        {
01102                                         $uidCollection[2] = $row['rl2'];
01103 
01104                                                 // Additional levels:
01105                                         if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields']))    {
01106                                                 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel)  {
01107                                                         if ($row[$fieldName])   {
01108                                                                 $uidCollection[$rootLineLevel] = $row[$fieldName];
01109                                                         }
01110                                                 }
01111                                         }
01112                                 }
01113                         }
01114                 }
01115 
01116                         // Return root line.
01117                 ksort($uidCollection);
01118                 return implode('/',$uidCollection);
01119         }
01120 
01128         function makeItemTypeIcon($it,$alt='')  {
01129                 if (!isset($this->iconFileNameCache[$it]))      {
01130                         if ($it==='0')  {
01131                                 $icon = 'EXT:indexed_search/pi/res/pages.gif';
01132                         } elseif ($this->external_parsers[$it]) {
01133                                 $icon = $this->external_parsers[$it]->getIcon($it);
01134                         }
01135 
01136                         $fullPath = t3lib_div::getFileAbsFileName($icon);
01137 
01138                         if ($fullPath)  {
01139                                 $info = @getimagesize($fullPath);
01140                                 $iconPath = $GLOBALS['BACK_PATH'].'../'.substr($fullPath,strlen(PATH_site));
01141                                 $this->iconFileNameCache[$it] = is_array($info) ? '<img src="'.$iconPath.'" '.$info[3].' title="###TITLE_ATTRIBUTE###" alt="" />' : '';
01142                         }
01143                 }
01144                 return str_replace('###TITLE_ATTRIBUTE###',htmlspecialchars($it.': '.$alt),$this->iconFileNameCache[$it]);
01145         }
01146 
01153         function utf8_to_currentCharset($string)        {
01154                 global $LANG;
01155                 if ($LANG->charSet != 'utf-8')  {
01156                         $string = $LANG->csConvObj->utf8_decode($string, $LANG->charSet, TRUE);
01157                 }
01158                 return $string;
01159         }
01160 
01161 
01162 
01163 
01164 
01165 
01166 
01167 
01168 
01169 
01170 
01171 
01172         /********************************
01173          *
01174          * Reindexing
01175          *
01176          *******************************/
01177 
01185         function reindexPhash($phash, $pageId)  {
01186 
01187                         // Query:
01188                 list($resultRow) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
01189                                         'ISEC.*, IP.*',
01190                                         'index_phash IP, index_section ISEC',
01191                                         'IP.phash = ISEC.phash
01192                                                 AND IP.phash = '.intval($phash).'
01193                                                 AND ISEC.page_id = '.intval($pageId)
01194                                 );
01195 
01196                 $content = '';
01197                 if (is_array($resultRow))       {
01198                         if ($resultRow['item_type'] && $resultRow['item_type']!=='0')   {
01199 
01200                                         // (Re)-Indexing file on page.
01201                                 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
01202                                 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $this->getUidRootLineForClosestTemplate($pageId));
01203 
01204                                         // URL or local file:
01205                                 if ($resultRow['externalUrl'])  {
01206                                         $indexerObj->indexExternalUrl($resultRow['data_filename']);
01207                                 } else {
01208                                         $indexerObj->indexRegularDocument($resultRow['data_filename'], TRUE);
01209                                 }
01210 
01211                                 if ($indexerObj->file_phash_arr['phash'] != $resultRow['phash'])        {
01212                                         $content.= 'ERROR: phash ('.$indexerObj->file_phash_arr['phash'].') did NOT match '.$resultRow['phash'].' for strange reasons!';
01213                                 }
01214 
01215                                 $content.='<h4>Log for re-indexing of "'.htmlspecialchars($resultRow['data_filename']).'":</h4>';
01216                                 $content.=t3lib_div::view_array($indexerObj->internal_log);
01217 
01218                                 $content.='<h4>Hash-array, page:</h4>';
01219                                 $content.=t3lib_div::view_array($indexerObj->hash);
01220 
01221                                 $content.='<h4>Hash-array, file:</h4>';
01222                                 $content.=t3lib_div::view_array($indexerObj->file_phash_arr);
01223                         }
01224                 }
01225 
01226                         // Link back to list.
01227                 $content.= $this->linkList();
01228 
01229                 return $content;
01230         }
01231 
01239         function getUidRootLineForClosestTemplate($id)  {
01240                 $tmpl = t3lib_div::makeInstance("t3lib_tsparser_ext");  // Defined global here!
01241                 $tmpl->tt_track = 0;    // Do not log time-performance information
01242                 $tmpl->init();
01243 
01244                                 // Gets the rootLine
01245                 $sys_page = t3lib_div::makeInstance("t3lib_pageSelect");
01246                 $rootLine = $sys_page->getRootLine($id);
01247                 $tmpl->runThroughTemplates($rootLine,0);        // This generates the constants/config + hierarchy info for the template.
01248 
01249                         // Root line uids
01250                 $rootline_uids = array();
01251                 foreach($tmpl->rootLine as $rlkey => $rldat)    {
01252                         $rootline_uids[$rlkey] = $rldat['uid'];
01253                 }
01254 
01255                 return $rootline_uids;
01256         }
01257 
01258 
01259 
01260 
01261 
01262 
01263 
01264 
01265 
01266 
01267 
01268 
01269         /********************************
01270          *
01271          * Indexing of configurations
01272          *
01273          *******************************/
01274 
01280         function extraIndexing()        {
01281 
01282                         // Select index configurations on this page
01283                 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
01284                                         '*',
01285                                         'index_config',
01286                                         'pid = '.intval($this->pObj->id).
01287                                                 ' AND hidden=0'.
01288                                                 ' AND starttime<'.time()
01289                                 );
01290 
01291 
01292                 $rl = $this->getUidRootLineForClosestTemplate($this->pObj->id);
01293 
01294                 foreach($ftrows as $cfgRow)             {
01295                         switch($cfgRow['type']) {
01296                                 case 1:
01297                                         if ($cfgRow['table2index'] && isset($GLOBALS['TCA'][$cfgRow['table2index']]))   {
01298 
01299                                                         // Init:
01300                                                 $pid = intval($cfgRow['alternative_source_pid']) ? intval($cfgRow['alternative_source_pid']) : $this->pObj->id;
01301                                                 $fieldList = t3lib_div::trimExplode(',',$cfgRow['fieldlist'],1);
01302 
01303                                                         // Select
01304                                                 $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
01305                                                                         '*',
01306                                                                         $cfgRow['table2index'],
01307                                                                         'pid = '.intval($pid)
01308                                                                 );
01309 
01310                                                         // Traverse:
01311                                                 foreach($recs as $r)    {
01312                                                                 // (Re)-Indexing a row from a table:
01313                                                         $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
01314                                                         parse_str(str_replace('###UID###',$r['uid'],$cfgRow['get_params']),$GETparams);
01315                                                         $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl, $GETparams, $cfgRow['chashcalc'] ? TRUE : FALSE);
01316                                                         $indexerObj->backend_setFreeIndexUid($cfgRow['uid']);
01317 
01318                                                         $theContent = '';
01319                                                         foreach($fieldList as $k => $v) {
01320                                                                 if (!$k)        {
01321                                                                         $theTitle = $r[$v];
01322                                                                 } else {
01323                                                                         $theContent.= $r[$v].' ';
01324                                                                 }
01325                                                         }
01326 #debug($theContent,$theTitle);
01327                                                         $indexerObj->backend_indexAsTYPO3Page(
01328                                                                         $theTitle,
01329                                                                         '',
01330                                                                         '',
01331                                                                         $theContent,
01332                                                                         $GLOBALS['LANG']->charSet,
01333                                                                         $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['tstamp']],
01334                                                                         $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['crdate']],
01335                                                                         $r['uid']
01336                                                                 );
01337 
01338                                                 }
01339 #debug($recs);
01340                                         }
01341                                 break;
01342                                 case 2:
01343                                         $readpath = $cfgRow['filepath'];
01344                                         if (!t3lib_div::isAbsPath($readPath))   {
01345                                                 $readpath = t3lib_div::getFileAbsFileName($readpath);
01346                                         }
01347 #debug($readpath,'$readpath');
01348 
01349                                         if (t3lib_div::isAllowedAbsPath($readpath))     {
01350                                                 $extList = implode(',',t3lib_div::trimExplode(',',$cfgRow['extensions'],1));
01351                                                 $fileArr = array();
01352                                                 $files = t3lib_div::getAllFilesAndFoldersInPath($fileArr,$readpath,$extList,0,$cfgRow['depth']);
01353                                                 $files = t3lib_div::removePrefixPathFromList($files,PATH_site);
01354 #debug($files);
01355                                                 foreach($files as $path)        {
01356                                                                 // (Re)-Indexing file on page.
01357                                                         $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
01358                                                         $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl);
01359                                                         $indexerObj->backend_setFreeIndexUid($cfgRow['uid']);
01360                                                         $indexerObj->hash['phash'] = -1;        // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!)
01361 
01362                                                         $indexerObj->indexRegularDocument($path, TRUE);
01363 
01364 #debug($indexerObj->internal_log,$resultRow['data_filename']);
01365 #debug($indexerObj->file_phash_arr,'file_phash_arr');
01366 #debug($indexerObj->hash,'hash');
01367 
01368                                                 }
01369                                         }
01370                                 break;
01371                                 case 3:
01372                                         if ($cfgRow['externalUrl'])     {
01373                                                 $this->indexExtUrlRecursively($cfgRow['externalUrl'], $cfgRow['depth'], $this->pObj->id, $rl, $cfgRow['uid']);
01374                                         }
01375                                 break;
01376                         }
01377                 }
01378         }
01379 
01391         function indexExtUrlRecursively($url, $depth, $pageId, $rl, $cfgUid)    {
01392 
01393                         // Index external URL:
01394                 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer');
01395                 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl);
01396                 $indexerObj->backend_setFreeIndexUid($cfgUid);
01397 
01398                 $indexerObj->indexExternalUrl($url);
01399                 $url_qParts = parse_url($url);
01400 
01401                         // Recursion:
01402                 if ($depth>0)   {
01403                         $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content);
01404 
01405                                                         // Traverse links:
01406                         foreach($list as $count => $linkInfo)   {
01407 
01408                                         // Decode entities:
01409                                 $linkSource = t3lib_div::htmlspecialchars_decode($linkInfo['href']);
01410 
01411                                 $qParts = parse_url($linkSource);
01412                                 if (!$qParts['scheme']) {
01413                                         $linkSource = $url_qParts['scheme'].'://'.$url_qParts['host'].'/'.$linkSource;
01414                                 }
01415 
01416                                 $this->indexExtUrlRecursively($linkSource, $depth-1, $pageId, $rl, $cfgUid);
01417 
01418                                         // Temporary limit until we know how to handle hundreds of URLs with limited parsetime in PHP...
01419                                 if ($count>3)   break;
01420                         }
01421                 }
01422         }
01423 
01424 
01425 
01426 
01427 
01428 
01429 
01430 
01431 
01432 
01433 
01434 
01435         /********************************
01436          *
01437          * SQL functions
01438          *
01439          *******************************/
01440 
01448         function removeIndexedPhashRow($phashList,$clearPageCache=1)    {
01449                 $phashRows = t3lib_div::trimExplode(',',$phashList,1);
01450 
01451                 foreach($phashRows as $phash)   {
01452                         $phash = intval($phash);
01453                         if ($phash>0)   {
01454 
01455                                 if ($clearPageCache)    {
01456                                                 // Clearing page cache:
01457                                         $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('page_id', 'index_section', 'phash='.intval($phash));
01458                                         if ($GLOBALS['TYPO3_DB']->sql_num_rows($res))   {
01459                                                 $idList = array();
01460                                                 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))       {
01461                                                         $idList[] = $row['page_id'];
01462                                                 }
01463                                                 $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pages', 'page_id IN ('.implode(',',$GLOBALS['TYPO3_DB']->cleanIntArray($idList)).')');
01464                                         }
01465                                 }
01466 
01467                                         // Removing old registrations for all tables.
01468                                 $tableArr = explode(',','index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug');
01469                                 foreach($tableArr as $table)    {
01470                                         $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
01471                                 }
01472 
01473                                         // Did not remove any index_section records for external files where phash_t3 points to this hash!
01474                         }
01475                 }
01476         }
01477 
01485         function getGrListEntriesForPhash($phash,$gr_list)      {
01486                 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_grlist', 'phash='.intval($phash));
01487                 $lines = array();
01488                 $isRemoved = 0;
01489                 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))       {
01490                         if (!$isRemoved && !strcmp($row['gr_list'],$gr_list))   {
01491                                 $isRemoved = 1;
01492                         } else {
01493                                 $lines[] = $row;
01494                         }
01495                 }
01496                 return $lines;
01497         }
01498 
01505         function processStopWords($stopWords)   {
01506 
01507                 if ($GLOBALS['BE_USER']->isAdmin())     {
01508                                 // Traverse words
01509                         foreach($stopWords as $wid => $state)   {
01510                                 $fieldArray = array(
01511                                         'is_stopword' => $state
01512                                 );
01513                                 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_words', 'wid='.$wid, $fieldArray);
01514                         }
01515                 }
01516         }
01517 
01525         function processPageKeywords($pageKeywords, $pageUid)   {
01526 
01527                         // Get pages current keywords
01528                 $pageRec = t3lib_BEfunc::getRecord('pages', $pageUid);
01529                 $keywords = array_flip(t3lib_div::trimExplode(',', $pageRec['keywords'], 1));
01530 
01531                         // Merge keywords:
01532                 foreach($pageKeywords as $key => $v)    {
01533                         if ($v) {
01534                                 $keywords[$key]=1;
01535                         } else {
01536                                 unset($keywords[$key]);
01537                         }
01538                 }
01539 
01540                         // Compile new list:
01541                 $data = array();
01542                 $data['pages'][$pageUid]['keywords'] = implode(', ',array_keys($keywords));
01543 
01544                 $tce = t3lib_div::makeInstance('t3lib_TCEmain');
01545                 $tce->stripslashes_values = 0;
01546                 $tce->start($data,array());
01547                 $tce->process_datamap();
01548         }
01549 }
01550 
01551 
01552 
01553 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php'])    {
01554     include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']);
01555 }
01556 
01557 ?>


Généré par Les spécialistes TYPO3 avec  doxygen 1.4.6