Documentation TYPO3 par Ameos |
00001 <?php 00002 /*************************************************************** 00003 * Copyright notice 00004 * 00005 * (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com) 00006 * All rights reserved 00007 * 00008 * This script is part of the TYPO3 project. The TYPO3 project is 00009 * free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * The GNU General Public License can be found at 00015 * http://www.gnu.org/copyleft/gpl.html. 00016 * 00017 * This script is distributed in the hope that it will be useful, 00018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 * GNU General Public License for more details. 00021 * 00022 * This copyright notice MUST APPEAR in all copies of the script! 00023 ***************************************************************/ 00084 require_once(PATH_t3lib.'class.t3lib_pagetree.php'); 00085 require_once(PATH_t3lib.'class.t3lib_extobjbase.php'); 00086 require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php'); 00087 00088 00089 // ... all for the rootline! 00090 require_once (PATH_t3lib."class.t3lib_page.php"); 00091 require_once (PATH_t3lib."class.t3lib_tstemplate.php"); 00092 require_once (PATH_t3lib."class.t3lib_tsparser_ext.php"); 00093 00094 // Keywords mgm: 00095 require_once (PATH_t3lib."class.t3lib_tcemain.php"); 00096 00097 00098 00106 class tx_indexedsearch_modfunc1 extends t3lib_extobjbase { 00107 00108 // Internal, dynamic: 00109 var $allPhashListed = array(); // phash values accumulations for link to clear all 00110 var $external_parsers = array(); // External content parsers - objects set here with file extensions as keys. 00111 var $iconFileNameCache = array(); // File extensions - icon map/cache. 00112 var $indexerObj; // Indexer object 00113 00114 00120 function modMenu() { 00121 global $LANG; 00122 00123 return array ( 00124 'depth' => array( 00125 0 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_0'), 00126 1 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_1'), 00127 2 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_2'), 00128 3 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_3'), 00129 999 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_infi'), 00130 ), 00131 'type' => array( 00132 0 => 'Overview', 00133 1 => 'Technical Details', 00134 2 => 'Words and content', 00135 ) 00136 ); 00137 } 00138 00144 function main() { 00145 // Initializes the module. Done in this function because we may need to re-initialize if data is submitted! 00146 global $LANG,$TYPO3_CONF_VARS; 00147 00148 // Return if no page id: 00149 if ($this->pObj->id<=0) return; 00150 00151 // Initialize max-list items 00152 $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100; 00153 00154 // Processing deletion of phash rows: 00155 if (t3lib_div::_GP('deletePhash')) { 00156 $this->removeIndexedPhashRow(t3lib_div::_GP('deletePhash')); 00157 } 00158 00159 // Processing stop-words: 00160 if (t3lib_div::_POST('_stopwords')) { 00161 $this->processStopWords(t3lib_div::_POST('stopWord')); 00162 } 00163 00164 // Processing stop-words: 00165 if (t3lib_div::_POST('_pageKeywords')) { 00166 $this->processPageKeywords(t3lib_div::_POST('pageKeyword'), t3lib_div::_POST('pageKeyword_pageUid')); 00167 } 00168 00169 // Initialize external document parsers: 00170 // Example configuration, see ext_localconf.php of this file! 00171 if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) { 00172 foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) { 00173 $this->external_parsers[$extension] = &t3lib_div::getUserObj($_objRef); 00174 00175 // Init parser and if it returns false, unset its entry again: 00176 if (!$this->external_parsers[$extension]->softInit($extension)) { 00177 unset($this->external_parsers[$extension]); 00178 } 00179 } 00180 } 00181 00182 // Initialize indexer if we need it (metaphone display does...) 00183 $this->indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00184 00185 // Set CSS styles specific for this document: 00186 $this->pObj->content = str_replace('/*###POSTCSSMARKER###*/',' 00187 TABLE.c-list TR TD { white-space: nowrap; vertical-align: top; } 00188 ',$this->pObj->content); 00189 00190 00191 // Check if details for a phash record should be shown: 00192 if (t3lib_div::_GET('phash')) { 00193 00194 // Show title / function menu: 00195 $theOutput.=$this->pObj->doc->spacer(5); 00196 $theOutput.=$this->pObj->doc->section('Details for a single result row:',$this->showDetailsForPhash(t3lib_div::_GET('phash')),0,1); 00197 } elseif (t3lib_div::_GET('wid')) { 00198 00199 // Show title / function menu: 00200 $theOutput.=$this->pObj->doc->spacer(5); 00201 $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1); 00202 } elseif (t3lib_div::_GET('metaphone')) { 00203 00204 // Show title / function menu: 00205 $theOutput.=$this->pObj->doc->spacer(5); 00206 $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1); 00207 } elseif (t3lib_div::_GET('reindex')) { 00208 00209 // Show title / function menu: 00210 $theOutput.=$this->pObj->doc->spacer(5); 00211 $theOutput.=$this->pObj->doc->section('Reindexing...',$this->reindexPhash(t3lib_div::_GET('reindex'),t3lib_div::_GET('reindex_id')),0,1); 00212 } else { // Detail listings: 00213 // Depth function menu: 00214 $h_func = t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[type]',$this->pObj->MOD_SETTINGS['type'],$this->pObj->MOD_MENU['type'],'index.php'); 00215 $h_func.= t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[depth]',$this->pObj->MOD_SETTINGS['depth'],$this->pObj->MOD_MENU['depth'],'index.php'); 00216 00217 // Show title / function menu: 00218 $theOutput.=$this->pObj->doc->spacer(5); 00219 $theOutput.=$this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1); 00220 00221 $theOutput.=$this->drawTableOfIndexedPages(); 00222 } 00223 00224 return $theOutput; 00225 } 00226 00227 00228 00229 00230 00231 00232 00233 00234 00235 00236 00237 /******************************* 00238 * 00239 * Drawing table of indexed pages 00240 * 00241 ******************************/ 00242 00248 function drawTableOfIndexedPages() { 00249 global $BACK_PATH; 00250 00251 // Drawing tree: 00252 $tree = t3lib_div::makeInstance('t3lib_pageTree'); 00253 $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1); 00254 $tree->init('AND '.$perms_clause); 00255 00256 $HTML = '<img src="'.$BACK_PATH.t3lib_iconWorks::getIcon('pages',$this->pObj->pageinfo).'" width="18" height="16" align="top" alt="" />'; 00257 $tree->tree[] = Array( 00258 'row' => $this->pObj->pageinfo, 00259 'HTML' => $HTML 00260 ); 00261 00262 if ($this->pObj->MOD_SETTINGS['depth']) { 00263 $tree->getTree($this->pObj->id, $this->pObj->MOD_SETTINGS['depth'], ''); 00264 } 00265 00266 // Traverse page tree: 00267 $code = ''; 00268 foreach($tree->tree as $data) { 00269 $code.= $this->indexed_info( 00270 $data['row'], 00271 $data['HTML']. 00272 $this->showPageDetails(t3lib_div::fixed_lgd($data['row']['title'], 20),$data['row']['uid']) 00273 ); 00274 } 00275 00276 if ($code) { 00277 $code = '<br/><br/> 00278 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'. 00279 $this->printPhashRowHeader(). 00280 $code. 00281 '</table>'; 00282 00283 // Create section to output: 00284 $theOutput.=$this->pObj->doc->section('',$code,0,1); 00285 } else { 00286 $theOutput.=$this->pObj->doc->section('','<br/><br/>'.$this->pObj->doc->icons(1).'There were no indexed pages found in the tree.<br/><br/>',0,1); 00287 } 00288 00289 return $theOutput; 00290 } 00291 00299 function indexed_info($data, $firstColContent) { 00300 00301 // Query: 00302 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( 00303 'ISEC.*, IP.*, count(*) AS count_val', 00304 'index_phash IP, index_section ISEC', 00305 'IP.phash = ISEC.phash AND ISEC.page_id = '.intval($data['uid']), 00306 'IP.phash,IP.phash_grouping,IP.cHashParams,IP.data_filename,IP.data_page_id,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId', 00307 'IP.item_type, IP.tstamp', 00308 ($this->maxListPerPage+1) 00309 ); 00310 00311 // Initialize variables: 00312 $rowCount = 0; 00313 $lines = array(); // Collecting HTML rows here. 00314 $phashAcc = array(); // Collecting phash values (to remove local indexing for) 00315 $phashAcc[] = 0; 00316 00317 // Traverse the result set of phash rows selected: 00318 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 00319 if ($rowCount == $this->maxListPerPage) { 00320 $rowCount++; // Increase to the extra warning row will appear as well. 00321 break; 00322 } 00323 00324 // Adds a display row: 00325 $lines[$row['phash_grouping']][] = $this->printPhashRow( 00326 $row, 00327 isset($lines[$row['phash_grouping']]), 00328 $this->getGrListEntriesForPhash($row['phash'], $row['gr_list']) 00329 ); 00330 $rowCount++; 00331 $phashAcc[] = $row['phash']; 00332 $this->allPhashListed[] = $row['phash']; // For removing all shown phash rows. 00333 } 00334 00335 // Compile rows into the table: 00336 $out = ''; 00337 $cellAttrib = ($data['_CSSCLASS'] ? ' class="'.$data['_CSSCLASS'].'"' : ''); 00338 if (count($lines)) { 00339 $firstColContent = '<td rowspan="'.$rowCount.'"'.$cellAttrib.'>'.$firstColContent.'</td>'; 00340 foreach($lines as $rowSet) { 00341 foreach($rowSet as $rows) { 00342 $out.=' 00343 <tr class="bgColor-20">'.$firstColContent.implode('',$rows).'</tr>'; 00344 00345 $firstColContent = ''; 00346 } 00347 } 00348 00349 if ($rowCount > $this->maxListPerPage) { // Now checking greater than, because we increased $rowCount before... 00350 $out.=' 00351 <tr class="bgColor-20"> 00352 <td> </td> 00353 <td colspan="'.($this->returnNumberOfColumns()-1).'">'.$this->pObj->doc->icons(3).'<span class="">There were more than '.$this->maxListPerPage.' rows. <a href="'.htmlspecialchars('index.php?id='.$this->pObj->id.'&listALL=1').'">Click here to list them ALL!</a></span></td> 00354 </tr>'; 00355 } 00356 } else { 00357 $out.=' 00358 <tr class="bgColor-20"> 00359 <td'.$cellAttrib.'>'.$firstColContent.'</td> 00360 <td colspan="'.($this->returnNumberOfColumns()-1).'"><em>Not indexed</em></td> 00361 </tr>'; 00362 } 00363 00364 // Checking for phash-rows which are NOT joined with the section table: 00365 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('IP.*', 'index_phash IP', 'IP.data_page_id = '.intval($data['uid']).' AND IP.phash NOT IN ('.implode(',',$phashAcc).')'); 00366 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 00367 $out.=' 00368 <tr class="typo3-red"> 00369 <td colspan="'.$this->returnNumberOfColumns().'"><b>Warning:</b> phash-row "'.$row['phash'].'" didn\'t have a representation in the index_section table!</td> 00370 </tr>'; 00371 $this->allPhashListed[] = $row['phash']; 00372 } 00373 00374 return $out; 00375 } 00376 00386 function printPhashRow($row,$grouping=0,$extraGrListRows) { 00387 $lines = array(); 00388 00389 // Title cell attributes will highlight TYPO3 pages with a slightly darker color (bgColor4) than attached medias. Also IF there are more than one section record for a phash row it will be red as a warning that something is wrong! 00390 $titleCellAttribs = $row['count_val']!=1?' bgcolor="red"':($row['item_type']==='0' ? ' class="bgColor4"' : ''); 00391 00392 if ($row['item_type']) { 00393 $arr = unserialize($row['cHashParams']); 00394 $page = $arr['key'] ? ' ['.$arr['key'].']' : ''; 00395 } else $page = ''; 00396 $elTitle = $this->linkDetails($row['item_title'] ? htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($row['item_title']), 20).$page) : '<em>[No Title]</em>',$row['phash']); 00397 $cmdLinks = $this->printRemoveIndexed($row['phash'],'Clear phash-row').$this->printReindex($row,'Re-index element'); 00398 00399 switch($this->pObj->MOD_SETTINGS['type']) { 00400 case 1: // Technical details: 00401 // Display icon: 00402 if (!$grouping) { 00403 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>'; 00404 } else { 00405 $lines[] = '<td> </td>'; 00406 } 00407 00408 // Title displayed: 00409 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>'; 00410 00411 // Remove-indexing-link: 00412 $lines[] = '<td>'.$cmdLinks.'</td>'; 00413 00414 // Various data: 00415 $lines[] = '<td>'.$row['phash'].'</td>'; 00416 $lines[] = '<td>'.$row['contentHash'].'</td>'; 00417 00418 if ($row['item_type']==='0') { 00419 $lines[] = '<td>'.($row['data_page_id'] ? $row['data_page_id'] : ' ').'</td>'; 00420 $lines[] = '<td>'.($row['data_page_type'] ? $row['data_page_type'] : ' ').'</td>'; 00421 $lines[] = '<td>'.($row['sys_language_uid'] ? $row['sys_language_uid'] : ' ').'</td>'; 00422 $lines[] = '<td>'.($row['data_page_mp'] ? $row['data_page_mp'] : ' ').'</td>'; 00423 } else { 00424 $lines[] = '<td colspan="4">'.htmlspecialchars($row['data_filename']).'</td>'; 00425 } 00426 $lines[] = '<td>'.$row['gr_list'].$this->printExtraGrListRows($extraGrListRows).'</td>'; 00427 $lines[] = '<td>'.$this->printRootlineInfo($row).'</td>'; 00428 $lines[] = '<td>'.($row['page_id'] ? $row['page_id'] : ' ').'</td>'; 00429 $lines[] = '<td>'.($row['phash_t3']!=$row['phash'] ? $row['phash_t3'] : ' ').'</td>'; 00430 $lines[] = '<td>'.($row['freeIndexUid'] ? $row['freeIndexUid'].($row['freeIndexSetId']?'/'.$row['freeIndexSetId']:'') : ' ').'</td>'; 00431 $lines[] = '<td>'.($row['recordUid'] ? $row['recordUid'] : ' ').'</td>'; 00432 00433 00434 00435 // cHash parameters: 00436 $arr = unserialize($row['cHashParams']); 00437 if (is_array($arr)) { 00438 $theCHash = $arr['cHash']; 00439 unset($arr['cHash']); 00440 } 00441 00442 if ($row['item_type']) { // pdf... 00443 $lines[] = '<td>'.($arr['key'] ? 'Page '.$arr['key'] : '').' </td>'; 00444 } elseif ($row['item_type']==0) { 00445 $lines[] = '<td>'.htmlspecialchars(t3lib_div::implodeArrayForUrl('',$arr)).' </td>'; 00446 } else { 00447 $lines[] = '<td class="bgColor"> </td>'; 00448 } 00449 00450 $lines[] = '<td>'.$theCHash.'</td>'; 00451 break; 00452 case 2: // Words and content: 00453 // Display icon: 00454 if (!$grouping) { 00455 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>'; 00456 } else { 00457 $lines[] = '<td> </td>'; 00458 } 00459 00460 // Title displayed: 00461 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>'; 00462 00463 // Remove-indexing-link: 00464 $lines[] = '<td>'.$cmdLinks.'</td>'; 00465 00466 // Query: 00467 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00468 '*', 00469 'index_fulltext', 00470 'phash = '.intval($row['phash']) 00471 ); 00472 $lines[] = '<td style="white-space: normal;">'. 00473 htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($ftrows[0]['fulltextdata']),3000)). 00474 '<hr/><em>Size: '.strlen($ftrows[0]['fulltextdata']).'</em>'. 00475 '</td>'; 00476 00477 // Query: 00478 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00479 'index_words.baseword, index_rel.*', 00480 'index_rel, index_words', 00481 'index_rel.phash = '.intval($row['phash']). 00482 ' AND index_words.wid = index_rel.wid', 00483 '', 00484 '', 00485 '', 00486 'baseword' 00487 ); 00488 00489 $wordList = ''; 00490 if (is_array($ftrows)) { 00491 $indexed_words = array_keys($ftrows); 00492 sort($indexed_words); 00493 $wordList = htmlspecialchars($this->utf8_to_currentCharset(implode(' ',$indexed_words))); 00494 $wordList.='<hr/><em>Count: '.count($indexed_words).'</em>'; 00495 } 00496 00497 $lines[] = '<td style="white-space: normal;">'.$wordList.'</td>'; 00498 break; 00499 default: // Overview 00500 // Display icon: 00501 if (!$grouping) { 00502 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>'; 00503 } else { 00504 $lines[] = '<td> </td>'; 00505 } 00506 00507 // Title displayed: 00508 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>'; 00509 00510 // Remove-indexing-link: 00511 $lines[] = '<td>'.$cmdLinks.'</td>'; 00512 00513 $lines[] = '<td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset($row['item_description'])).'...</td>'; 00514 $lines[] = '<td>'.t3lib_div::formatSize($row['item_size']).'</td>'; 00515 $lines[] = '<td>'.t3lib_BEfunc::dateTimeAge($row['tstamp']).'</td>'; 00516 break; 00517 } 00518 00519 return $lines; 00520 } 00521 00527 function printPhashRowHeader() { 00528 $lines = array(); 00529 00530 switch($this->pObj->MOD_SETTINGS['type']) { 00531 case 1: 00532 $lines[] = '<td> </td>'; 00533 $lines[] = '<td> </td>'; 00534 $lines[] = '<td>Title</td>'; 00535 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed('ALL','Clear ALL phash-rows below!').'</td>'; 00536 00537 $lines[] = '<td>pHash</td>'; 00538 $lines[] = '<td>cHash</td>'; 00539 $lines[] = '<td>&id</td>'; 00540 $lines[] = '<td>&type</td>'; 00541 $lines[] = '<td>&L</td>'; 00542 $lines[] = '<td>&MP</td>'; 00543 $lines[] = '<td>grlist</td>'; 00544 $lines[] = '<td>Rootline</td>'; 00545 $lines[] = '<td>page_id</td>'; 00546 $lines[] = '<td>phash_t3</td>'; 00547 $lines[] = '<td>CfgUid</td>'; 00548 $lines[] = '<td>RecUid</td>'; 00549 $lines[] = '<td>GET-parameters</td>'; 00550 $lines[] = '<td>&cHash</td>'; 00551 break; 00552 case 2: 00553 $lines[] = '<td> </td>'; 00554 $lines[] = '<td> </td>'; 00555 $lines[] = '<td>Title</td>'; 00556 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed('ALL','Clear ALL phash-rows below!').'</td>'; 00557 $lines[] = '<td>Content<br/> 00558 <img src="clear.gif" width="300" height="1" alt="" /></td>'; 00559 $lines[] = '<td>Words<br/> 00560 <img src="clear.gif" width="300" height="1" alt="" /></td>'; 00561 break; 00562 default: 00563 $lines[] = '<td> </td>'; 00564 $lines[] = '<td> </td>'; 00565 $lines[] = '<td>Title</td>'; 00566 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed('ALL','Clear ALL phash-rows below!').'</td>'; 00567 $lines[] = '<td>Description</td>'; 00568 $lines[] = '<td>Size</td>'; 00569 $lines[] = '<td>Indexed:</td>'; 00570 break; 00571 } 00572 00573 $out = '<tr class="tableheader bgColor5">'.implode('',$lines).'</tr>'; 00574 return $out; 00575 } 00576 00582 function returnNumberOfColumns() { 00583 switch($this->pObj->MOD_SETTINGS['type']) { 00584 case 1: 00585 return 18; 00586 break; 00587 case 2: 00588 return 6; 00589 break; 00590 default: 00591 return 7; 00592 break; 00593 } 00594 } 00595 00596 00597 00598 00599 00600 00601 00602 00603 00604 00605 00606 /******************************* 00607 * 00608 * Details display, phash row 00609 * 00610 *******************************/ 00611 00618 function showDetailsForPhash($phash) { 00619 00620 $content = ''; 00621 00622 // Selects the result row: 00623 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00624 '*', 00625 'index_phash', 00626 'phash = '.intval($phash) 00627 ); 00628 $phashRecord = $ftrows[0]; 00629 00630 // If found, display: 00631 if (is_array($phashRecord)) { 00632 $content.= '<h4>phash row content:</h4>'. 00633 $this->utf8_to_currentCharset(t3lib_div::view_array($phashRecord)); 00634 00635 // Getting debug information if any: 00636 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00637 '*', 00638 'index_debug', 00639 'phash = '.intval($phash) 00640 ); 00641 if (is_array($ftrows)) { 00642 $debugInfo = unserialize($ftrows[0]['debuginfo']); 00643 $lexer = $debugInfo['lexer']; 00644 unset($debugInfo['lexer']); 00645 00646 $content.= '<h3>Debug information:</h3>'. 00647 $this->utf8_to_currentCharset(t3lib_div::view_array($debugInfo)); 00648 00649 $content.= '<h4>Debug information / lexer splitting:</h4>'. 00650 '<hr/><b>'. 00651 $this->utf8_to_currentCharset($lexer). 00652 '</b><hr/>'; 00653 } 00654 00655 00656 00657 $content.='<h3>Word statistics</h3>'; 00658 00659 // Finding all words for this phash: 00660 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00661 'index_words.*, index_rel.*', 00662 'index_rel, index_words', 00663 'index_rel.phash = '.intval($phash). 00664 ' AND index_words.wid = index_rel.wid', 00665 '', 00666 'index_words.baseword', 00667 '' 00668 ); 00669 $pageRec = t3lib_BEfunc::getRecord('pages', $phashRecord['data_page_id']); 00670 $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin(); 00671 $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec); 00672 00673 // Group metaphone hash: 00674 $metaphone = array(); 00675 foreach($ftrows as $row) { 00676 $metaphone[$row['metaphone']][] = $row['baseword']; 00677 } 00678 $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:'); 00679 00680 // Finding top-20 on frequency for this phash: 00681 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00682 'index_words.baseword, index_words.metaphone, index_rel.*', 00683 'index_rel, index_words', 00684 'index_rel.phash = '.intval($phash). 00685 ' AND index_words.wid = index_rel.wid 00686 AND index_words.is_stopword=0', 00687 '', 00688 'index_rel.freq DESC', 00689 '20' 00690 ); 00691 $content.= $this->listWords($ftrows, 'Top-20 words by frequency:', 2); 00692 00693 // Finding top-20 on count for this phash: 00694 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00695 'index_words.baseword, index_words.metaphone, index_rel.*', 00696 'index_rel, index_words', 00697 'index_rel.phash = '.intval($phash). 00698 ' AND index_words.wid = index_rel.wid 00699 AND index_words.is_stopword=0', 00700 '', 00701 'index_rel.count DESC', 00702 '20' 00703 ); 00704 $content.= $this->listWords($ftrows, 'Top-20 words by count:', 2); 00705 00706 00707 $content.='<h3>Section records for this phash</h3>'; 00708 00709 // Finding sections for this record: 00710 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00711 '*', 00712 'index_section', 00713 'index_section.phash = '.intval($phash), 00714 '', 00715 '', 00716 '' 00717 ); 00718 $content.= t3lib_div::view_array($ftrows); 00719 00720 // Add go-back link: 00721 $content = $this->linkList().$content.$this->linkList(); 00722 00723 } else $content.= 'Error: No phash row found'; 00724 00725 return $content; 00726 } 00727 00737 function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='') { 00738 00739 // Prepare keywords: 00740 $keywords = is_array($page) ? array_flip(t3lib_div::trimExplode(',',$page['keywords'], 1)) : ''; 00741 00742 // Render list: 00743 $trows = ''; 00744 $trows.= ' 00745 <tr class="tableheader bgColor5"> 00746 '.($stopWordBoxes ? '<td>'.htmlspecialchars('Stopword:').'</td>' : '').' 00747 <td>'.htmlspecialchars('Word:').'</td> 00748 <td>'.htmlspecialchars('Count:').'</td> 00749 <td>'.htmlspecialchars('First:').'</td> 00750 <td>'.htmlspecialchars('Frequency:').'</td> 00751 <td>'.htmlspecialchars('Flags:').'</td> 00752 '.(is_array($keywords) ? '<td>'.htmlspecialchars('Page keyword:').'</td>' : '').' 00753 </tr> 00754 '; 00755 foreach($ftrows as $row) { 00756 $hiddenField = $stopWordBoxes!=2 ? '<input type="hidden" name="stopWord['.$row['wid'].']" value="0" />' : ''; 00757 $trows.= ' 00758 <tr class="'.($row['is_stopword'] ? 'bgColor' : 'bgColor4').'"> 00759 '.($stopWordBoxes ? '<td align="center"'.($row['is_stopword'] ? ' style="background-color:red;"' : '').'>'.$hiddenField.'<input type="checkbox" name="stopWord['.$row['wid'].']" value="1"'.($row['is_stopword']?'checked="checked"':'').' /></td>' : '').' 00760 <td>'.$this->linkWordDetails(htmlspecialchars($this->utf8_to_currentCharset($row['baseword'])), $row['wid']).'</td> 00761 <td>'.htmlspecialchars($row['count']).'</td> 00762 <td>'.htmlspecialchars($row['first']).'</td> 00763 <td>'.htmlspecialchars($row['freq']).'</td> 00764 <td>'.htmlspecialchars($this->flagsMsg($row['flags'])).'</td> 00765 '.(is_array($keywords) ? '<td align="center"'.(isset($keywords[$row['baseword']]) ? ' class="bgColor2"' : '').'><input type="hidden" name="pageKeyword['.$row['baseword'].']" value="0" /><input type="checkbox" name="pageKeyword['.$row['baseword'].']" value="1"'.(isset($keywords[$row['baseword']])?'checked="checked"':'').' /></td>' : '').' 00766 </tr> 00767 '; 00768 } 00769 00770 return '<h4>'.htmlspecialchars($header).'</h4>'. 00771 ' 00772 <table border="0" cellspacing="1" cellpadding="2" class="c-list"> 00773 '.$trows.' 00774 </table>'. 00775 ($stopWordBoxes ? '<input type="submit" value="Change stop-word settings" name="_stopwords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" />' : ''). 00776 (is_array($keywords) ? '<input type="submit" value="Set page keywords" name="_pageKeywords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" /><input type="hidden" name="pageKeyword_pageUid" value="'.$page['uid'].'" />'. 00777 '<br/>Current keywords are: <em>'.htmlspecialchars(implode(', ',array_keys($keywords))).'</em>' : ''); 00778 } 00779 00787 function listMetaphoneStat($ftrows,$header) { 00788 00789 $trows = ''; 00790 $trows.= ' 00791 <tr class="tableheader bgColor5"> 00792 <td>'.htmlspecialchars('Metaphone:').'</td> 00793 <td>'.htmlspecialchars('Hash:').'</td> 00794 <td>'.htmlspecialchars('Count:').'</td> 00795 <td>'.htmlspecialchars('Words:').'</td> 00796 </tr> 00797 '; 00798 foreach($ftrows as $metaphone => $words) { 00799 if (count($words)>1) { 00800 $trows.= ' 00801 <tr class="bgColor4"> 00802 <td>'.$this->linkMetaPhoneDetails($this->indexerObj->metaphone($words[0],1),$metaphone).'</td> 00803 <td>'.htmlspecialchars($metaphone).'</td> 00804 <td>'.htmlspecialchars(count($words)).'</td> 00805 <td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset(implode(', ',$words))).'</td> 00806 </tr> 00807 '; 00808 } 00809 } 00810 00811 return '<h4>'.htmlspecialchars($header).'</h4>'. 00812 '<table border="0" cellspacing="1" cellpadding="2" class="c-list"> 00813 '.$trows.' 00814 </table>'; 00815 } 00816 00824 function linkWordDetails($string,$wid) { 00825 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('wid'=>$wid,'phash'=>''))).'">'.$string.'</a>'; 00826 } 00827 00828 00836 function linkMetaPhoneDetails($string,$metaphone) { 00837 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('metaphone'=>$metaphone,'wid'=>'','phash'=>''))).'">'.$string.'</a>'; 00838 } 00839 00846 function flagsMsg($flags) { 00847 if ($flags > 0) { 00848 return 00849 ($flags & 128 ? '<title>' : ''). // pow(2,7) 00850 ($flags & 64 ? '<meta/keywords>' : ''). // pow(2,6) 00851 ($flags & 32 ? '<meta/description>' : ''). // pow(2,5) 00852 ' ('.$flags.')'; 00853 } 00854 } 00855 00856 00857 00858 00859 00860 00861 00862 00863 00864 00865 /******************************* 00866 * 00867 * Details display, words / metaphone 00868 * 00869 *******************************/ 00870 00877 function showDetailsForWord($wid) { 00878 00879 // Select references to this word 00880 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00881 'index_phash.*, index_section.*, index_rel.*', 00882 'index_rel, index_section, index_phash', 00883 'index_rel.wid = '.intval($wid). 00884 ' AND index_rel.phash = index_section.phash'. 00885 ' AND index_section.phash = index_phash.phash', 00886 '', 00887 'index_rel.freq DESC', 00888 '' 00889 ); 00890 00891 // Headers: 00892 $content.=' 00893 <tr class="tableheader bgColor5"> 00894 <td>phash</td> 00895 <td>page_id</td> 00896 <td>data_filename</td> 00897 <td>count</td> 00898 <td>first</td> 00899 <td>freq</td> 00900 <td>flags</td> 00901 </tr>'; 00902 00903 if (is_array($ftrows)) { 00904 foreach($ftrows as $wDat) { 00905 $content.=' 00906 <tr class="bgColor4"> 00907 <td>'.$this->linkDetails(htmlspecialchars($wDat['phash']),$wDat['phash']).'</td> 00908 <td>'.htmlspecialchars($wDat['page_id']).'</td> 00909 <td>'.htmlspecialchars($wDat['data_filename']).'</td> 00910 <td>'.htmlspecialchars($wDat['count']).'</td> 00911 <td>'.htmlspecialchars($wDat['first']).'</td> 00912 <td>'.htmlspecialchars($wDat['freq']).'</td> 00913 <td>'.htmlspecialchars($wDat['flags']).'</td> 00914 </tr>'; 00915 } 00916 } 00917 00918 // Compile table: 00919 $content = ' 00920 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'. 00921 $content.' 00922 </table>'; 00923 00924 // Add go-back link: 00925 $content = $content.$this->linkList(); 00926 00927 return $content; 00928 } 00929 00936 function showDetailsForMetaphone($metaphone) { 00937 00938 // Finding top-20 on frequency for this phash: 00939 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00940 'index_words.*', 00941 'index_words', 00942 'index_words.metaphone = '.intval($metaphone), 00943 '', 00944 'index_words.baseword', 00945 '' 00946 ); 00947 00948 if (count($ftrows)) { 00949 $content.='<h4>Metaphone: '.$this->indexerObj->metaphone($ftrows[0]['baseword'],1).'</h4>'; 00950 00951 $content.=' 00952 <tr class="tableheader bgColor5"> 00953 <td>Word</td> 00954 <td>Is stopword?</td> 00955 </tr>'; 00956 00957 if (is_array($ftrows)) { 00958 foreach($ftrows as $wDat) { 00959 $content.=' 00960 <tr class="bgColor4"> 00961 <td>'.$this->linkWordDetails(htmlspecialchars($wDat['baseword']),$wDat['wid']).'</td> 00962 <td>'.htmlspecialchars($wDat['is_stopword'] ? 'YES' : 'No').'</td> 00963 </tr>'; 00964 } 00965 } 00966 00967 $content = ' 00968 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'. 00969 $content.' 00970 </table>'; 00971 00972 if ($this->indexerObj->metaphone($ftrows[0]['baseword'])!=$metaphone) { 00973 $content.='ERROR: Metaphone string and hash did not match for some reason!?'; 00974 } 00975 00976 // Add go-back link: 00977 $content = $content.$this->linkList(); 00978 } 00979 00980 return $content; 00981 } 00982 00983 00984 00985 00986 00987 00988 00989 00990 00991 00992 00993 00994 /******************************* 00995 * 00996 * Helper functions 00997 * 00998 *******************************/ 00999 01007 function printRemoveIndexed($phash,$alt) { 01008 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('deletePhash'=>$phash))).'">'. 01009 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/garbage.gif" width="11" hspace="1" vspace="2" height="12" border="0" title="'.htmlspecialchars($alt).'" alt="" />'. 01010 '</a>'; 01011 } 01012 01020 function printReindex($resultRow,$alt) { 01021 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') { 01022 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('reindex'=>$resultRow['phash'],'reindex_id'=>$resultRow['page_id']))).'">'. 01023 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/refresh_n.gif" width="14" hspace="1" vspace="2" height="14" border="0" title="'.htmlspecialchars($alt).'" alt="" />'. 01024 '</a>'; 01025 } 01026 } 01027 01035 function linkDetails($string,$phash) { 01036 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('phash'=>$phash))).'">'.$string.'</a>'; 01037 } 01038 01044 function linkList() { 01045 return '<br/><a href="index.php?id='.$this->pObj->id.'">Back to list.</a><br/>'; 01046 } 01047 01055 function showPageDetails($string,$id) { 01056 return '<a href="'.htmlspecialchars('index.php?id='.$id.'&SET[depth]=0&SET[type]=1').'">'.$string.'</a>'; 01057 } 01058 01065 function printExtraGrListRows($extraGrListRows) { 01066 if (count($extraGrListRows)) { 01067 reset($extraGrListRows); 01068 $lines=array(); 01069 while(list(,$r)=each($extraGrListRows)) { 01070 $lines[] = $r['gr_list']; 01071 } 01072 return "<br/>".$GLOBALS['TBE_TEMPLATE']->dfw(implode('<br/>',$lines)); 01073 } 01074 } 01075 01082 function printRootlineInfo($row) { 01083 $uidCollection = array(); 01084 01085 if ($row['rl0']) { 01086 $uidCollection[0] = $row['rl0']; 01087 if ($row['rl1']) { 01088 $uidCollection[1] = $row['rl1']; 01089 if ($row['rl2']) { 01090 $uidCollection[2] = $row['rl2']; 01091 01092 // Additional levels: 01093 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) { 01094 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) { 01095 if ($row[$fieldName]) { 01096 $uidCollection[$rootLineLevel] = $row[$fieldName]; 01097 } 01098 } 01099 } 01100 } 01101 } 01102 } 01103 01104 // Return root line. 01105 ksort($uidCollection); 01106 return implode('/',$uidCollection); 01107 } 01108 01116 function makeItemTypeIcon($it,$alt='') { 01117 if (!isset($this->iconFileNameCache[$it])) { 01118 if ($it==='0') { 01119 $icon = 'EXT:indexed_search/pi/res/pages.gif'; 01120 } elseif ($this->external_parsers[$it]) { 01121 $icon = $this->external_parsers[$it]->getIcon($it); 01122 } 01123 01124 $fullPath = t3lib_div::getFileAbsFileName($icon); 01125 01126 if ($fullPath) { 01127 $info = @getimagesize($fullPath); 01128 $iconPath = $GLOBALS['BACK_PATH'].'../'.substr($fullPath,strlen(PATH_site)); 01129 $this->iconFileNameCache[$it] = is_array($info) ? '<img src="'.$iconPath.'" '.$info[3].' title="###TITLE_ATTRIBUTE###" alt="" />' : ''; 01130 } 01131 } 01132 return str_replace('###TITLE_ATTRIBUTE###',htmlspecialchars($it.': '.$alt),$this->iconFileNameCache[$it]); 01133 } 01134 01141 function utf8_to_currentCharset($string) { 01142 global $LANG; 01143 if ($LANG->charSet != 'utf-8') { 01144 $string = $LANG->csConvObj->utf8_decode($string, $LANG->charSet, TRUE); 01145 } 01146 return $string; 01147 } 01148 01149 01150 01151 01152 01153 01154 01155 01156 01157 01158 01159 01160 /******************************** 01161 * 01162 * Reindexing 01163 * 01164 *******************************/ 01165 01173 function reindexPhash($phash, $pageId) { 01174 01175 // Query: 01176 list($resultRow) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 01177 'ISEC.*, IP.*', 01178 'index_phash IP, index_section ISEC', 01179 'IP.phash = ISEC.phash 01180 AND IP.phash = '.intval($phash).' 01181 AND ISEC.page_id = '.intval($pageId) 01182 ); 01183 01184 $content = ''; 01185 if (is_array($resultRow)) { 01186 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') { 01187 01188 // (Re)-Indexing file on page. 01189 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 01190 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $this->getUidRootLineForClosestTemplate($pageId)); 01191 01192 // URL or local file: 01193 if ($resultRow['externalUrl']) { 01194 $indexerObj->indexExternalUrl($resultRow['data_filename']); 01195 } else { 01196 $indexerObj->indexRegularDocument($resultRow['data_filename'], TRUE); 01197 } 01198 01199 if ($indexerObj->file_phash_arr['phash'] != $resultRow['phash']) { 01200 $content.= 'ERROR: phash ('.$indexerObj->file_phash_arr['phash'].') did NOT match '.$resultRow['phash'].' for strange reasons!'; 01201 } 01202 01203 $content.='<h4>Log for re-indexing of "'.htmlspecialchars($resultRow['data_filename']).'":</h4>'; 01204 $content.=t3lib_div::view_array($indexerObj->internal_log); 01205 01206 $content.='<h4>Hash-array, page:</h4>'; 01207 $content.=t3lib_div::view_array($indexerObj->hash); 01208 01209 $content.='<h4>Hash-array, file:</h4>'; 01210 $content.=t3lib_div::view_array($indexerObj->file_phash_arr); 01211 } 01212 } 01213 01214 // Link back to list. 01215 $content.= $this->linkList(); 01216 01217 return $content; 01218 } 01219 01227 function getUidRootLineForClosestTemplate($id) { 01228 $tmpl = t3lib_div::makeInstance('t3lib_tsparser_ext'); // Defined global here! 01229 $tmpl->tt_track = 0; // Do not log time-performance information 01230 $tmpl->init(); 01231 01232 // Gets the rootLine 01233 $sys_page = t3lib_div::makeInstance('t3lib_pageSelect'); 01234 $rootLine = $sys_page->getRootLine($id); 01235 $tmpl->runThroughTemplates($rootLine,0); // This generates the constants/config + hierarchy info for the template. 01236 01237 // Root line uids 01238 $rootline_uids = array(); 01239 foreach($tmpl->rootLine as $rlkey => $rldat) { 01240 $rootline_uids[$rlkey] = $rldat['uid']; 01241 } 01242 01243 return $rootline_uids; 01244 } 01245 01246 01247 01248 01249 01250 01251 01252 01253 01254 01255 01256 01257 /******************************** 01258 * 01259 * SQL functions 01260 * 01261 *******************************/ 01262 01270 function removeIndexedPhashRow($phashList,$clearPageCache=1) { 01271 // FIXME: This is only a workaround 01272 if ($phashList=='ALL') { 01273 $this->drawTableOfIndexedPages(); 01274 $phashRows = $this->allPhashListed; 01275 $this->allPhashListed = array(); // Reset it because it will be filled again later... 01276 } else { 01277 $phashRows = t3lib_div::trimExplode(',',$phashList,1); 01278 } 01279 01280 foreach($phashRows as $phash) { 01281 $phash = intval($phash); 01282 if ($phash>0) { 01283 01284 if ($clearPageCache) { 01285 // Clearing page cache: 01286 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('page_id', 'index_section', 'phash='.intval($phash)); 01287 if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) { 01288 $idList = array(); 01289 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01290 $idList[] = $row['page_id']; 01291 } 01292 $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pages', 'page_id IN ('.implode(',',$GLOBALS['TYPO3_DB']->cleanIntArray($idList)).')'); 01293 } 01294 } 01295 01296 // Removing old registrations for all tables. 01297 $tableArr = explode(',','index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug'); 01298 foreach($tableArr as $table) { 01299 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); 01300 } 01301 01302 // Did not remove any index_section records for external files where phash_t3 points to this hash! 01303 } 01304 } 01305 } 01306 01314 function getGrListEntriesForPhash($phash,$gr_list) { 01315 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_grlist', 'phash='.intval($phash)); 01316 $lines = array(); 01317 $isRemoved = 0; 01318 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01319 if (!$isRemoved && !strcmp($row['gr_list'],$gr_list)) { 01320 $isRemoved = 1; 01321 } else { 01322 $lines[] = $row; 01323 } 01324 } 01325 return $lines; 01326 } 01327 01334 function processStopWords($stopWords) { 01335 01336 if ($GLOBALS['BE_USER']->isAdmin()) { 01337 // Traverse words 01338 foreach($stopWords as $wid => $state) { 01339 $fieldArray = array( 01340 'is_stopword' => $state 01341 ); 01342 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_words', 'wid='.$wid, $fieldArray); 01343 } 01344 } 01345 } 01346 01354 function processPageKeywords($pageKeywords, $pageUid) { 01355 01356 // Get pages current keywords 01357 $pageRec = t3lib_BEfunc::getRecord('pages', $pageUid); 01358 $keywords = array_flip(t3lib_div::trimExplode(',', $pageRec['keywords'], 1)); 01359 01360 // Merge keywords: 01361 foreach($pageKeywords as $key => $v) { 01362 if ($v) { 01363 $keywords[$key]=1; 01364 } else { 01365 unset($keywords[$key]); 01366 } 01367 } 01368 01369 // Compile new list: 01370 $data = array(); 01371 $data['pages'][$pageUid]['keywords'] = implode(', ',array_keys($keywords)); 01372 01373 $tce = t3lib_div::makeInstance('t3lib_TCEmain'); 01374 $tce->stripslashes_values = 0; 01375 $tce->start($data,array()); 01376 $tce->process_datamap(); 01377 } 01378 } 01379 01380 01381 01382 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']) { 01383 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']); 01384 } 01385 01386 ?>