Documentation TYPO3 par Ameos |
00001 <?php 00002 /*************************************************************** 00003 * Copyright notice 00004 * 00005 * (c) 2001-2005 Kasper Skaarhoj (kasperYYYY@typo3.com) 00006 * All rights reserved 00007 * 00008 * This script is part of the TYPO3 project. The TYPO3 project is 00009 * free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * The GNU General Public License can be found at 00015 * http://www.gnu.org/copyleft/gpl.html. 00016 * 00017 * This script is distributed in the hope that it will be useful, 00018 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00019 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00020 * GNU General Public License for more details. 00021 * 00022 * This copyright notice MUST APPEAR in all copies of the script! 00023 ***************************************************************/ 00088 require_once(PATH_t3lib.'class.t3lib_pagetree.php'); 00089 require_once(PATH_t3lib.'class.t3lib_extobjbase.php'); 00090 require_once(t3lib_extMgm::extPath('indexed_search').'class.indexer.php'); 00091 00092 00093 // ... all for the rootline! 00094 require_once (PATH_t3lib."class.t3lib_page.php"); 00095 require_once (PATH_t3lib."class.t3lib_tstemplate.php"); 00096 require_once (PATH_t3lib."class.t3lib_tsparser_ext.php"); 00097 00098 // Keywords mgm: 00099 require_once (PATH_t3lib."class.t3lib_tcemain.php"); 00100 00101 00102 00110 class tx_indexedsearch_modfunc1 extends t3lib_extobjbase { 00111 00112 // Internal, dynamic: 00113 var $allPhashListed = array(); // phash values accumulations for link to clear all 00114 var $external_parsers = array(); // External content parsers - objects set here with file extensions as keys. 00115 var $iconFileNameCache = array(); // File extensions - icon map/cache. 00116 var $indexerObj; // Indexer object 00117 00118 00124 function modMenu() { 00125 global $LANG; 00126 00127 return array ( 00128 'depth' => array( 00129 0 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_0'), 00130 1 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_1'), 00131 2 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_2'), 00132 3 => $LANG->sL('LLL:EXT:lang/locallang_core.php:labels.depth_3'), 00133 ), 00134 'type' => array( 00135 0 => 'Overview', 00136 1 => 'Technical Details', 00137 2 => 'Words and content', 00138 // 3 => 'Indexing' 00139 ) 00140 ); 00141 } 00142 00148 function main() { 00149 // Initializes the module. Done in this function because we may need to re-initialize if data is submitted! 00150 global $SOBE,$BE_USER,$LANG,$BACK_PATH,$TCA_DESCR,$TCA,$CLIENT,$TYPO3_CONF_VARS; 00151 00152 // Return if no page id: 00153 if ($this->pObj->id<=0) return; 00154 00155 // Initialize max-list items 00156 $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100; 00157 00158 // Processing deletion of phash rows: 00159 if (t3lib_div::_GP('deletePhash')) { 00160 $this->removeIndexedPhashRow(t3lib_div::_GP('deletePhash')); 00161 } 00162 00163 // Processing stop-words: 00164 if (t3lib_div::_POST('_stopwords')) { 00165 $this->processStopWords(t3lib_div::_POST('stopWord')); 00166 } 00167 00168 // Processing stop-words: 00169 if (t3lib_div::_POST('_pageKeywords')) { 00170 $this->processPageKeywords(t3lib_div::_POST('pageKeyword'), t3lib_div::_POST('pageKeyword_pageUid')); 00171 } 00172 00173 // Initialize external document parsers: 00174 // Example configuration, see ext_localconf.php of this file! 00175 if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) { 00176 foreach($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) { 00177 $this->external_parsers[$extension] = &t3lib_div::getUserObj($_objRef); 00178 00179 // Init parser and if it returns false, unset its entry again: 00180 if (!$this->external_parsers[$extension]->softInit($extension)) { 00181 unset($this->external_parsers[$extension]); 00182 } 00183 } 00184 } 00185 00186 // Initialize indexer if we need it (metaphone display does...) 00187 $this->indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 00188 00189 // Set CSS styles specific for this document: 00190 $this->pObj->content = str_replace('/*###POSTCSSMARKER###*/',' 00191 TABLE.c-list TR TD { white-space: nowrap; vertical-align: top; } 00192 ',$this->pObj->content); 00193 00194 00195 // Check if details for a phash record should be shown: 00196 if (t3lib_div::_GET('phash')) { 00197 00198 // Show title / function menu: 00199 $theOutput.=$this->pObj->doc->spacer(5); 00200 $theOutput.=$this->pObj->doc->section('Details for a single result row:',$this->showDetailsForPhash(t3lib_div::_GET('phash')),0,1); 00201 } elseif (t3lib_div::_GET('wid')) { 00202 00203 // Show title / function menu: 00204 $theOutput.=$this->pObj->doc->spacer(5); 00205 $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1); 00206 } elseif (t3lib_div::_GET('metaphone')) { 00207 00208 // Show title / function menu: 00209 $theOutput.=$this->pObj->doc->spacer(5); 00210 $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1); 00211 } elseif (t3lib_div::_GET('reindex')) { 00212 00213 // Show title / function menu: 00214 $theOutput.=$this->pObj->doc->spacer(5); 00215 $theOutput.=$this->pObj->doc->section('Reindexing...',$this->reindexPhash(t3lib_div::_GET('reindex'),t3lib_div::_GET('reindex_id')),0,1); 00216 } else { // Detail listings: 00217 // Depth function menu: 00218 $h_func = t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[type]',$this->pObj->MOD_SETTINGS['type'],$this->pObj->MOD_MENU['type'],'index.php'); 00219 if (t3lib_div::inList('0,1,2',$this->pObj->MOD_SETTINGS['type'])) { 00220 $h_func.= t3lib_BEfunc::getFuncMenu($this->pObj->id,'SET[depth]',$this->pObj->MOD_SETTINGS['depth'],$this->pObj->MOD_MENU['depth'],'index.php'); 00221 00222 // Show title / function menu: 00223 $theOutput.=$this->pObj->doc->spacer(5); 00224 $theOutput.=$this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1); 00225 00226 $theOutput.=$this->drawTableOfIndexedPages(); 00227 } else { 00228 00229 // Show title / function menu: 00230 $theOutput.= $this->pObj->doc->spacer(5); 00231 $theOutput.= $this->pObj->doc->section($LANG->getLL('title'),$h_func,0,1); 00232 00233 $theOutput.= $this->extraIndexing(); 00234 } 00235 } 00236 00237 return $theOutput; 00238 } 00239 00240 00241 00242 00243 00244 00245 00246 00247 00248 00249 00250 /******************************* 00251 * 00252 * Drawing table of indexed pages 00253 * 00254 ******************************/ 00255 00261 function drawTableOfIndexedPages() { 00262 global $BACK_PATH; 00263 00264 // Drawing tree: 00265 $tree = t3lib_div::makeInstance('t3lib_pageTree'); 00266 $perms_clause = $GLOBALS['BE_USER']->getPagePermsClause(1); 00267 $tree->init('AND '.$perms_clause); 00268 00269 $HTML = '<img src="'.$BACK_PATH.t3lib_iconWorks::getIcon('pages',$this->pObj->pageinfo).'" width="18" height="16" align="top" alt="" />'; 00270 $tree->tree[] = Array( 00271 'row' => $this->pObj->pageinfo, 00272 'HTML' => $HTML 00273 ); 00274 00275 if ($this->pObj->MOD_SETTINGS['depth']) { 00276 $tree->getTree($this->pObj->id, $this->pObj->MOD_SETTINGS['depth'], ''); 00277 } 00278 00279 // Traverse page tree: 00280 $code = ''; 00281 foreach($tree->tree as $data) { 00282 $code.= $this->indexed_info( 00283 $data['row'], 00284 $data['HTML']. 00285 $this->showPageDetails(t3lib_div::fixed_lgd($data['row']['title'], 20),$data['row']['uid']) 00286 ); 00287 } 00288 00289 if ($code) { 00290 $code = '<br/><br/> 00291 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'. 00292 $this->printPhashRowHeader(). 00293 $code. 00294 '</table>'; 00295 00296 // Create section to output: 00297 $theOutput.=$this->pObj->doc->section('',$code,0,1); 00298 } else { 00299 $theOutput.=$this->pObj->doc->section('','<br/><br/>'.$this->pObj->doc->icons(1).'There were no indexed pages found in the tree.<br/><br/>',0,1); 00300 } 00301 00302 return $theOutput; 00303 } 00304 00312 function indexed_info($data, $firstColContent) { 00313 00314 // Query: 00315 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( 00316 'ISEC.*, IP.*, count(*) AS count_val', 00317 'index_phash IP, index_section ISEC', 00318 'IP.phash = ISEC.phash AND ISEC.page_id = '.intval($data['uid']), 00319 'IP.phash,IP.phash_grouping,IP.cHashParams,IP.data_filename,IP.data_page_id,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.externalUrl,IP.recordUid,IP.freeIndexUid', 00320 'IP.item_type, IP.tstamp', 00321 ($this->maxListPerPage+1) 00322 ); 00323 00324 // Initialize variables: 00325 $rowCount = 0; 00326 $lines = array(); // Collecting HTML rows here. 00327 $phashAcc = array(); // Collecting phash values (to remove local indexing for) 00328 $phashAcc[] = 0; 00329 00330 // Traverse the result set of phash rows selected: 00331 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 00332 if ($rowCount == $this->maxListPerPage) { 00333 $rowCount++; // Increase to the extra warning row will appear as well. 00334 break; 00335 } 00336 00337 // Adds a display row: 00338 $lines[$row['phash_grouping']][] = $this->printPhashRow( 00339 $row, 00340 isset($lines[$row['phash_grouping']]), 00341 $this->getGrListEntriesForPhash($row['phash'], $row['gr_list']) 00342 ); 00343 $rowCount++; 00344 $phashAcc[] = $row['phash']; 00345 $this->allPhashListed[] = $row['phash']; // For removing all shown phash rows. 00346 } 00347 00348 // Compile rows into the table: 00349 $out = ''; 00350 if (count($lines)) { 00351 $firstColContent = '<td rowspan="'.$rowCount.'">'.$firstColContent.'</td>'; 00352 foreach($lines as $rowSet) { 00353 foreach($rowSet as $rows) { 00354 $out.=' 00355 <tr class="bgColor-20">'.$firstColContent.implode('',$rows).'</tr>'; 00356 00357 $firstColContent = ''; 00358 } 00359 } 00360 00361 if ($rowCount > $this->maxListPerPage) { // Now checking greater than, because we increased $rowCount before... 00362 $out.=' 00363 <tr class="bgColor-20"> 00364 <td> </td> 00365 <td colspan="'.($this->returnNumberOfColumns()-1).'">'.$this->pObj->doc->icons(3).'<span class="">There were more than '.$this->maxListPerPage.' rows. <a href="'.htmlspecialchars('index.php?id='.$this->pObj->id.'&listALL=1').'">Click here to list them ALL!</a></span></td> 00366 </tr>'; 00367 } 00368 } else { 00369 $out.=' 00370 <tr class="bgColor-20"> 00371 <td>'.$firstColContent.'</td> 00372 <td colspan="'.($this->returnNumberOfColumns()-1).'"><em>Not indexed</em></td> 00373 </tr>'; 00374 } 00375 00376 // Checking for phash-rows which are NOT joined with the section table: 00377 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('IP.*', 'index_phash IP', 'IP.data_page_id = '.intval($data['uid']).' AND IP.phash NOT IN ('.implode(',',$phashAcc).')'); 00378 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 00379 $out.=' 00380 <tr class="typo3-red"> 00381 <td colspan="'.$this->returnNumberOfColumns().'"><b>Warning:</b> phash-row "'.$row['phash'].'" didn\'t have a representation in the index_section table!</td> 00382 </tr>'; 00383 $this->allPhashListed[] = $row['phash']; 00384 } 00385 00386 return $out; 00387 } 00388 00398 function printPhashRow($row,$grouping=0,$extraGrListRows) { 00399 $lines = array(); 00400 00401 // Title cell attributes will highlight TYPO3 pages with a slightly darker color (bgColor4) than attached medias. Also IF there are more than one section record for a phash row it will be red as a warning that something is wrong! 00402 $titleCellAttribs = $row['count_val']!=1?' bgcolor="red"':($row['item_type']==='0' ? ' class="bgColor4"' : ''); 00403 00404 if ($row['item_type']) { 00405 $arr = unserialize($row['cHashParams']); 00406 $page = $arr['key'] ? ' ['.$arr['key'].']' : ''; 00407 } else $page = ''; 00408 $elTitle = $this->linkDetails($row['item_title'] ? htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($row['item_title']), 20).$page) : '<em>[No Title]</em>',$row['phash']); 00409 $cmdLinks = $this->printRemoveIndexed($row['phash'],'Clear phash-row').$this->printReindex($row,'Re-index element'); 00410 00411 switch($this->pObj->MOD_SETTINGS['type']) { 00412 case 1: // Technical details: 00413 // Display icon: 00414 if (!$grouping) { 00415 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>'; 00416 } else { 00417 $lines[] = '<td> </td>'; 00418 } 00419 00420 // Title displayed: 00421 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>'; 00422 00423 // Remove-indexing-link: 00424 $lines[] = '<td>'.$cmdLinks.'</td>'; 00425 00426 // Various data: 00427 $lines[] = '<td>'.$row['phash'].'</td>'; 00428 $lines[] = '<td>'.$row['contentHash'].'</td>'; 00429 00430 if ($row['item_type']==='0') { 00431 $lines[] = '<td>'.($row['data_page_id'] ? $row['data_page_id'] : ' ').'</td>'; 00432 $lines[] = '<td>'.($row['data_page_type'] ? $row['data_page_type'] : ' ').'</td>'; 00433 $lines[] = '<td>'.($row['sys_language_uid'] ? $row['sys_language_uid'] : ' ').'</td>'; 00434 $lines[] = '<td>'.($row['data_page_mp'] ? $row['data_page_mp'] : ' ').'</td>'; 00435 } else { 00436 $lines[] = '<td colspan="4">'.htmlspecialchars($row['data_filename']).'</td>'; 00437 } 00438 $lines[] = '<td>'.$row['gr_list'].$this->printExtraGrListRows($extraGrListRows).'</td>'; 00439 $lines[] = '<td>'.$this->printRootlineInfo($row).'</td>'; 00440 $lines[] = '<td>'.($row['page_id'] ? $row['page_id'] : ' ').'</td>'; 00441 $lines[] = '<td>'.($row['phash_t3']!=$row['phash'] ? $row['phash_t3'] : ' ').'</td>'; 00442 $lines[] = '<td>'.($row['freeIndexUid'] ? $row['freeIndexUid'] : ' ').'</td>'; 00443 $lines[] = '<td>'.($row['recordUid'] ? $row['recordUid'] : ' ').'</td>'; 00444 00445 00446 00447 // cHash parameters: 00448 $arr = unserialize($row['cHashParams']); 00449 if (is_array($arr)) { 00450 $theCHash = $arr['cHash']; 00451 unset($arr['cHash']); 00452 } 00453 00454 if ($row['item_type']) { // pdf... 00455 $lines[] = '<td>'.($arr['key'] ? 'Page '.$arr['key'] : '').' </td>'; 00456 } elseif ($row['item_type']==0) { 00457 $lines[] = '<td>'.htmlspecialchars(t3lib_div::implodeArrayForUrl('',$arr)).' </td>'; 00458 } else { 00459 $lines[] = '<td class="bgColor"> </td>'; 00460 } 00461 00462 $lines[] = '<td>'.$theCHash.'</td>'; 00463 break; 00464 case 2: // Words and content: 00465 // Display icon: 00466 if (!$grouping) { 00467 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>'; 00468 } else { 00469 $lines[] = '<td> </td>'; 00470 } 00471 00472 // Title displayed: 00473 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>'; 00474 00475 // Remove-indexing-link: 00476 $lines[] = '<td>'.$cmdLinks.'</td>'; 00477 00478 // Query: 00479 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00480 '*', 00481 'index_fulltext', 00482 'phash = '.intval($row['phash']) 00483 ); 00484 $lines[] = '<td style="white-space: normal;">'. 00485 htmlspecialchars(t3lib_div::fixed_lgd_cs($this->utf8_to_currentCharset($ftrows[0]['fulltextdata']),3000)). 00486 '<hr/><em>Size: '.strlen($ftrows[0]['fulltextdata']).'</em>'. 00487 '</td>'; 00488 00489 // Query: 00490 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00491 'index_words.baseword, index_rel.*', 00492 'index_rel, index_words', 00493 'index_rel.phash = '.intval($row['phash']). 00494 ' AND index_words.wid = index_rel.wid', 00495 '', 00496 '', 00497 '', 00498 'baseword' 00499 ); 00500 00501 $wordList = ''; 00502 if (is_array($ftrows)) { 00503 $indexed_words = array_keys($ftrows); 00504 sort($indexed_words); 00505 $wordList = htmlspecialchars($this->utf8_to_currentCharset(implode(' ',$indexed_words))); 00506 $wordList.='<hr/><em>Count: '.count($indexed_words).'</em>'; 00507 } 00508 00509 $lines[] = '<td style="white-space: normal;">'.$wordList.'</td>'; 00510 break; 00511 default: // Overview 00512 // Display icon: 00513 if (!$grouping) { 00514 $lines[] = '<td>'.$this->makeItemTypeIcon($row['item_type'], $row['data_filename'] ? $row['data_filename'] : $row['item_title']).'</td>'; 00515 } else { 00516 $lines[] = '<td> </td>'; 00517 } 00518 00519 // Title displayed: 00520 $lines[] = '<td'.$titleCellAttribs.'>'.$elTitle.'</td>'; 00521 00522 // Remove-indexing-link: 00523 $lines[] = '<td>'.$cmdLinks.'</td>'; 00524 00525 $lines[] = '<td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset($row['item_description'])).'...</td>'; 00526 $lines[] = '<td>'.t3lib_div::formatSize($row['item_size']).'</td>'; 00527 $lines[] = '<td>'.t3lib_BEfunc::dateTimeAge($row['tstamp']).'</td>'; 00528 break; 00529 } 00530 00531 return $lines; 00532 } 00533 00539 function printPhashRowHeader() { 00540 $lines = array(); 00541 00542 switch($this->pObj->MOD_SETTINGS['type']) { 00543 case 1: 00544 $lines[] = '<td> </td>'; 00545 $lines[] = '<td> </td>'; 00546 $lines[] = '<td>Title</td>'; 00547 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>'; 00548 00549 $lines[] = '<td>pHash</td>'; 00550 $lines[] = '<td>cHash</td>'; 00551 $lines[] = '<td>&id</td>'; 00552 $lines[] = '<td>&type</td>'; 00553 $lines[] = '<td>&L</td>'; 00554 $lines[] = '<td>&MP</td>'; 00555 $lines[] = '<td>grlist</td>'; 00556 $lines[] = '<td>Rootline</td>'; 00557 $lines[] = '<td>page_id</td>'; 00558 $lines[] = '<td>phash_t3</td>'; 00559 $lines[] = '<td>CfgUid</td>'; 00560 $lines[] = '<td>RecUid</td>'; 00561 $lines[] = '<td>GET-parameters</td>'; 00562 $lines[] = '<td>&cHash</td>'; 00563 break; 00564 case 2: 00565 $lines[] = '<td> </td>'; 00566 $lines[] = '<td> </td>'; 00567 $lines[] = '<td>Title</td>'; 00568 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>'; 00569 $lines[] = '<td>Content<br/> 00570 <img src="clear.gif" width="300" height="1" alt="" /></td>'; 00571 $lines[] = '<td>Words<br/> 00572 <img src="clear.gif" width="300" height="1" alt="" /></td>'; 00573 break; 00574 default: 00575 $lines[] = '<td> </td>'; 00576 $lines[] = '<td> </td>'; 00577 $lines[] = '<td>Title</td>'; 00578 $lines[] = '<td bgcolor="red">'.$this->printRemoveIndexed(implode(',',$this->allPhashListed),'Clear ALL phash-rows below!').'</td>'; 00579 $lines[] = '<td>Description</td>'; 00580 $lines[] = '<td>Size</td>'; 00581 $lines[] = '<td>Indexed:</td>'; 00582 break; 00583 } 00584 00585 $out = '<tr class="tableheader bgColor5">'.implode('',$lines).'</tr>'; 00586 return $out; 00587 } 00588 00594 function returnNumberOfColumns() { 00595 switch($this->pObj->MOD_SETTINGS['type']) { 00596 case 1: 00597 return 18; 00598 break; 00599 case 2: 00600 return 6; 00601 break; 00602 default: 00603 return 7; 00604 break; 00605 } 00606 } 00607 00608 00609 00610 00611 00612 00613 00614 00615 00616 00617 00618 /******************************* 00619 * 00620 * Details display, phash row 00621 * 00622 *******************************/ 00623 00630 function showDetailsForPhash($phash) { 00631 00632 $content = ''; 00633 00634 // Selects the result row: 00635 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00636 '*', 00637 'index_phash', 00638 'phash = '.intval($phash) 00639 ); 00640 $phashRecord = $ftrows[0]; 00641 00642 // If found, display: 00643 if (is_array($phashRecord)) { 00644 $content.= '<h4>phash row content:</h4>'. 00645 $this->utf8_to_currentCharset(t3lib_div::view_array($phashRecord)); 00646 00647 // Getting debug information if any: 00648 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00649 '*', 00650 'index_debug', 00651 'phash = '.intval($phash) 00652 ); 00653 if (is_array($ftrows)) { 00654 $debugInfo = unserialize($ftrows[0]['debuginfo']); 00655 $lexer = $debugInfo['lexer']; 00656 unset($debugInfo['lexer']); 00657 00658 $content.= '<h3>Debug information:</h3>'. 00659 $this->utf8_to_currentCharset(t3lib_div::view_array($debugInfo)); 00660 00661 $content.= '<h4>Debug information / lexer splitting:</h4>'. 00662 '<hr/><b>'. 00663 $this->utf8_to_currentCharset($lexer). 00664 '</b><hr/>'; 00665 } 00666 00667 00668 00669 $content.='<h3>Word statistics</h3>'; 00670 00671 // Finding all words for this phash: 00672 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00673 'index_words.*, index_rel.*', 00674 'index_rel, index_words', 00675 'index_rel.phash = '.intval($phash). 00676 ' AND index_words.wid = index_rel.wid', 00677 '', 00678 'index_words.baseword', 00679 '' 00680 ); 00681 $pageRec = t3lib_BEfunc::getRecord('pages', $phashRecord['data_page_id']); 00682 $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin(); 00683 $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec); 00684 00685 // Group metaphone hash: 00686 $metaphone = array(); 00687 foreach($ftrows as $row) { 00688 $metaphone[$row['metaphone']][] = $row['baseword']; 00689 } 00690 $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:'); 00691 00692 // Finding top-20 on frequency for this phash: 00693 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00694 'index_words.baseword, index_words.metaphone, index_rel.*', 00695 'index_rel, index_words', 00696 'index_rel.phash = '.intval($phash). 00697 ' AND index_words.wid = index_rel.wid 00698 AND index_words.is_stopword=0', 00699 '', 00700 'index_rel.freq DESC', 00701 '20' 00702 ); 00703 $content.= $this->listWords($ftrows, 'Top-20 words by frequency:', 2); 00704 00705 // Finding top-20 on count for this phash: 00706 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00707 'index_words.baseword, index_words.metaphone, index_rel.*', 00708 'index_rel, index_words', 00709 'index_rel.phash = '.intval($phash). 00710 ' AND index_words.wid = index_rel.wid 00711 AND index_words.is_stopword=0', 00712 '', 00713 'index_rel.count DESC', 00714 '20' 00715 ); 00716 $content.= $this->listWords($ftrows, 'Top-20 words by count:', 2); 00717 00718 00719 $content.='<h3>Section records for this phash</h3>'; 00720 00721 // Finding sections for this record: 00722 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00723 '*', 00724 'index_section', 00725 'index_section.phash = '.intval($phash), 00726 '', 00727 '', 00728 '' 00729 ); 00730 $content.= t3lib_div::view_array($ftrows); 00731 00732 // Add go-back link: 00733 $content = $this->linkList().$content.$this->linkList(); 00734 00735 } else $content.= 'Error: No phash row found'; 00736 00737 return $content; 00738 } 00739 00749 function listWords($ftrows,$header, $stopWordBoxes=FALSE, $page='') { 00750 00751 // Prepare keywords: 00752 $keywords = is_array($page) ? array_flip(t3lib_div::trimExplode(',',$page['keywords'], 1)) : ''; 00753 00754 // Render list: 00755 $trows = ''; 00756 $trows.= ' 00757 <tr class="tableheader bgColor5"> 00758 '.($stopWordBoxes ? '<td>'.htmlspecialchars('Stopword:').'</td>' : '').' 00759 <td>'.htmlspecialchars('Word:').'</td> 00760 <td>'.htmlspecialchars('Count:').'</td> 00761 <td>'.htmlspecialchars('First:').'</td> 00762 <td>'.htmlspecialchars('Frequency:').'</td> 00763 <td>'.htmlspecialchars('Flags:').'</td> 00764 '.(is_array($keywords) ? '<td>'.htmlspecialchars('Page keyword:').'</td>' : '').' 00765 </tr> 00766 '; 00767 foreach($ftrows as $row) { 00768 $hiddenField = $stopWordBoxes!=2 ? '<input type="hidden" name="stopWord['.$row['wid'].']" value="0" />' : ''; 00769 $trows.= ' 00770 <tr class="'.($row['is_stopword'] ? 'bgColor' : 'bgColor4').'"> 00771 '.($stopWordBoxes ? '<td align="center"'.($row['is_stopword'] ? ' style="background-color:red;"' : '').'>'.$hiddenField.'<input type="checkbox" name="stopWord['.$row['wid'].']" value="1"'.($row['is_stopword']?'checked="checked"':'').' /></td>' : '').' 00772 <td>'.$this->linkWordDetails(htmlspecialchars($this->utf8_to_currentCharset($row['baseword'])), $row['wid']).'</td> 00773 <td>'.htmlspecialchars($row['count']).'</td> 00774 <td>'.htmlspecialchars($row['first']).'</td> 00775 <td>'.htmlspecialchars($row['freq']).'</td> 00776 <td>'.htmlspecialchars($this->flagsMsg($row['flags'])).'</td> 00777 '.(is_array($keywords) ? '<td align="center"'.(isset($keywords[$row['baseword']]) ? ' class="bgColor2"' : '').'><input type="hidden" name="pageKeyword['.$row['baseword'].']" value="0" /><input type="checkbox" name="pageKeyword['.$row['baseword'].']" value="1"'.(isset($keywords[$row['baseword']])?'checked="checked"':'').' /></td>' : '').' 00778 </tr> 00779 '; 00780 } 00781 00782 return '<h4>'.htmlspecialchars($header).'</h4>'. 00783 ' 00784 <table border="0" cellspacing="1" cellpadding="2" class="c-list"> 00785 '.$trows.' 00786 </table>'. 00787 ($stopWordBoxes ? '<input type="submit" value="Change stop-word settings" name="_stopwords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" />' : ''). 00788 (is_array($keywords) ? '<input type="submit" value="Set page keywords" name="_pageKeywords" onclick="document.webinfoForm.action=\''.htmlspecialchars(t3lib_div::getIndpEnv('REQUEST_URI')).'\';" /><input type="hidden" name="pageKeyword_pageUid" value="'.$page['uid'].'" />'. 00789 '<br/>Current keywords are: <em>'.htmlspecialchars(implode(', ',array_keys($keywords))).'</em>' : ''); 00790 } 00791 00799 function listMetaphoneStat($ftrows,$header) { 00800 00801 $trows = ''; 00802 $trows.= ' 00803 <tr class="tableheader bgColor5"> 00804 <td>'.htmlspecialchars('Metaphone:').'</td> 00805 <td>'.htmlspecialchars('Hash:').'</td> 00806 <td>'.htmlspecialchars('Count:').'</td> 00807 <td>'.htmlspecialchars('Words:').'</td> 00808 </tr> 00809 '; 00810 foreach($ftrows as $metaphone => $words) { 00811 if (count($words)>1) { 00812 $trows.= ' 00813 <tr class="bgColor4"> 00814 <td>'.$this->linkMetaPhoneDetails($this->indexerObj->metaphone($words[0],1),$metaphone).'</td> 00815 <td>'.htmlspecialchars($metaphone).'</td> 00816 <td>'.htmlspecialchars(count($words)).'</td> 00817 <td style="white-space: normal;">'.htmlspecialchars($this->utf8_to_currentCharset(implode(', ',$words))).'</td> 00818 </tr> 00819 '; 00820 } 00821 } 00822 00823 return '<h4>'.htmlspecialchars($header).'</h4>'. 00824 '<table border="0" cellspacing="1" cellpadding="2" class="c-list"> 00825 '.$trows.' 00826 </table>'; 00827 } 00828 00836 function linkWordDetails($string,$wid) { 00837 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('wid'=>$wid,'phash'=>''))).'">'.$string.'</a>'; 00838 } 00839 00840 00848 function linkMetaPhoneDetails($string,$metaphone) { 00849 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('metaphone'=>$metaphone,'wid'=>'','phash'=>''))).'">'.$string.'</a>'; 00850 } 00851 00858 function flagsMsg($flags) { 00859 if ($flags > 0) { 00860 return 00861 ($flags & 128 ? '<title>' : ''). // pow(2,7) 00862 ($flags & 64 ? '<meta/keywords>' : ''). // pow(2,6) 00863 ($flags & 32 ? '<meta/description>' : ''). // pow(2,5) 00864 ' ('.$flags.')'; 00865 } 00866 } 00867 00868 00869 00870 00871 00872 00873 00874 00875 00876 00877 /******************************* 00878 * 00879 * Details display, words / metaphone 00880 * 00881 *******************************/ 00882 00889 function showDetailsForWord($wid) { 00890 00891 // Select references to this word 00892 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00893 'index_phash.*, index_section.*, index_rel.*', 00894 'index_rel, index_section, index_phash', 00895 'index_rel.wid = '.intval($wid). 00896 ' AND index_rel.phash = index_section.phash'. 00897 ' AND index_section.phash = index_phash.phash', 00898 '', 00899 'index_rel.freq DESC', 00900 '' 00901 ); 00902 00903 // Headers: 00904 $content.=' 00905 <tr class="tableheader bgColor5"> 00906 <td>phash</td> 00907 <td>page_id</td> 00908 <td>data_filename</td> 00909 <td>count</td> 00910 <td>first</td> 00911 <td>freq</td> 00912 <td>flags</td> 00913 </tr>'; 00914 00915 if (is_array($ftrows)) { 00916 foreach($ftrows as $wDat) { 00917 $content.=' 00918 <tr class="bgColor4"> 00919 <td>'.$this->linkDetails(htmlspecialchars($wDat['phash']),$wDat['phash']).'</td> 00920 <td>'.htmlspecialchars($wDat['page_id']).'</td> 00921 <td>'.htmlspecialchars($wDat['data_filename']).'</td> 00922 <td>'.htmlspecialchars($wDat['count']).'</td> 00923 <td>'.htmlspecialchars($wDat['first']).'</td> 00924 <td>'.htmlspecialchars($wDat['freq']).'</td> 00925 <td>'.htmlspecialchars($wDat['flags']).'</td> 00926 </tr>'; 00927 } 00928 } 00929 00930 // Compile table: 00931 $content = ' 00932 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'. 00933 $content.' 00934 </table>'; 00935 00936 // Add go-back link: 00937 $content = $content.$this->linkList(); 00938 00939 return $content; 00940 } 00941 00948 function showDetailsForMetaphone($metaphone) { 00949 00950 // Finding top-20 on frequency for this phash: 00951 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 00952 'index_words.*', 00953 'index_words', 00954 'index_words.metaphone = '.intval($metaphone), 00955 '', 00956 'index_words.baseword', 00957 '' 00958 ); 00959 00960 if (count($ftrows)) { 00961 $content.='<h4>Metaphone: '.$this->indexerObj->metaphone($ftrows[0]['baseword'],1).'</h4>'; 00962 00963 $content.=' 00964 <tr class="tableheader bgColor5"> 00965 <td>Word</td> 00966 <td>Is stopword?</td> 00967 </tr>'; 00968 00969 if (is_array($ftrows)) { 00970 foreach($ftrows as $wDat) { 00971 $content.=' 00972 <tr class="bgColor4"> 00973 <td>'.$this->linkWordDetails(htmlspecialchars($wDat['baseword']),$wDat['wid']).'</td> 00974 <td>'.htmlspecialchars($wDat['is_stopword'] ? 'YES' : 'No').'</td> 00975 </tr>'; 00976 } 00977 } 00978 00979 $content = ' 00980 <table border="0" cellspacing="1" cellpadding="2" class="c-list">'. 00981 $content.' 00982 </table>'; 00983 00984 if ($this->indexerObj->metaphone($ftrows[0]['baseword'])!=$metaphone) { 00985 $content.='ERROR: Metaphone string and hash did not match for some reason!?'; 00986 } 00987 00988 // Add go-back link: 00989 $content = $content.$this->linkList(); 00990 } 00991 00992 return $content; 00993 } 00994 00995 00996 00997 00998 00999 01000 01001 01002 01003 01004 01005 01006 /******************************* 01007 * 01008 * Helper functions 01009 * 01010 *******************************/ 01011 01019 function printRemoveIndexed($phash,$alt) { 01020 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('deletePhash'=>$phash))).'">'. 01021 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/garbage.gif" width="11" hspace="1" vspace="2" height="12" border="0" title="'.htmlspecialchars($alt).'" alt="" />'. 01022 '</a>'; 01023 } 01024 01032 function printReindex($resultRow,$alt) { 01033 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') { 01034 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('reindex'=>$resultRow['phash'],'reindex_id'=>$resultRow['page_id']))).'">'. 01035 '<img src="'.$GLOBALS['BACK_PATH'].'gfx/refresh_n.gif" width="14" hspace="1" vspace="2" height="14" border="0" title="'.htmlspecialchars($alt).'" alt="" />'. 01036 '</a>'; 01037 } 01038 } 01039 01047 function linkDetails($string,$phash) { 01048 return '<a href="'.htmlspecialchars(t3lib_div::linkThisScript(array('phash'=>$phash))).'">'.$string.'</a>'; 01049 } 01050 01056 function linkList() { 01057 return '<br/><a href="index.php?id='.$this->pObj->id.'">Back to list.</a><br/>'; 01058 } 01059 01067 function showPageDetails($string,$id) { 01068 return '<a href="'.htmlspecialchars('index.php?id='.$id.'&SET[depth]=0&SET[type]=1').'">'.$string.'</a>'; 01069 } 01070 01077 function printExtraGrListRows($extraGrListRows) { 01078 if (count($extraGrListRows)) { 01079 reset($extraGrListRows); 01080 $lines=array(); 01081 while(list(,$r)=each($extraGrListRows)) { 01082 $lines[] = $r['gr_list']; 01083 } 01084 return "<br/>".$GLOBALS['TBE_TEMPLATE']->dfw(implode('<br/>',$lines)); 01085 } 01086 } 01087 01094 function printRootlineInfo($row) { 01095 $uidCollection = array(); 01096 01097 if ($row['rl0']) { 01098 $uidCollection[0] = $row['rl0']; 01099 if ($row['rl1']) { 01100 $uidCollection[1] = $row['rl1']; 01101 if ($row['rl2']) { 01102 $uidCollection[2] = $row['rl2']; 01103 01104 // Additional levels: 01105 if (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) { 01106 foreach($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) { 01107 if ($row[$fieldName]) { 01108 $uidCollection[$rootLineLevel] = $row[$fieldName]; 01109 } 01110 } 01111 } 01112 } 01113 } 01114 } 01115 01116 // Return root line. 01117 ksort($uidCollection); 01118 return implode('/',$uidCollection); 01119 } 01120 01128 function makeItemTypeIcon($it,$alt='') { 01129 if (!isset($this->iconFileNameCache[$it])) { 01130 if ($it==='0') { 01131 $icon = 'EXT:indexed_search/pi/res/pages.gif'; 01132 } elseif ($this->external_parsers[$it]) { 01133 $icon = $this->external_parsers[$it]->getIcon($it); 01134 } 01135 01136 $fullPath = t3lib_div::getFileAbsFileName($icon); 01137 01138 if ($fullPath) { 01139 $info = @getimagesize($fullPath); 01140 $iconPath = $GLOBALS['BACK_PATH'].'../'.substr($fullPath,strlen(PATH_site)); 01141 $this->iconFileNameCache[$it] = is_array($info) ? '<img src="'.$iconPath.'" '.$info[3].' title="###TITLE_ATTRIBUTE###" alt="" />' : ''; 01142 } 01143 } 01144 return str_replace('###TITLE_ATTRIBUTE###',htmlspecialchars($it.': '.$alt),$this->iconFileNameCache[$it]); 01145 } 01146 01153 function utf8_to_currentCharset($string) { 01154 global $LANG; 01155 if ($LANG->charSet != 'utf-8') { 01156 $string = $LANG->csConvObj->utf8_decode($string, $LANG->charSet, TRUE); 01157 } 01158 return $string; 01159 } 01160 01161 01162 01163 01164 01165 01166 01167 01168 01169 01170 01171 01172 /******************************** 01173 * 01174 * Reindexing 01175 * 01176 *******************************/ 01177 01185 function reindexPhash($phash, $pageId) { 01186 01187 // Query: 01188 list($resultRow) = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 01189 'ISEC.*, IP.*', 01190 'index_phash IP, index_section ISEC', 01191 'IP.phash = ISEC.phash 01192 AND IP.phash = '.intval($phash).' 01193 AND ISEC.page_id = '.intval($pageId) 01194 ); 01195 01196 $content = ''; 01197 if (is_array($resultRow)) { 01198 if ($resultRow['item_type'] && $resultRow['item_type']!=='0') { 01199 01200 // (Re)-Indexing file on page. 01201 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 01202 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $this->getUidRootLineForClosestTemplate($pageId)); 01203 01204 // URL or local file: 01205 if ($resultRow['externalUrl']) { 01206 $indexerObj->indexExternalUrl($resultRow['data_filename']); 01207 } else { 01208 $indexerObj->indexRegularDocument($resultRow['data_filename'], TRUE); 01209 } 01210 01211 if ($indexerObj->file_phash_arr['phash'] != $resultRow['phash']) { 01212 $content.= 'ERROR: phash ('.$indexerObj->file_phash_arr['phash'].') did NOT match '.$resultRow['phash'].' for strange reasons!'; 01213 } 01214 01215 $content.='<h4>Log for re-indexing of "'.htmlspecialchars($resultRow['data_filename']).'":</h4>'; 01216 $content.=t3lib_div::view_array($indexerObj->internal_log); 01217 01218 $content.='<h4>Hash-array, page:</h4>'; 01219 $content.=t3lib_div::view_array($indexerObj->hash); 01220 01221 $content.='<h4>Hash-array, file:</h4>'; 01222 $content.=t3lib_div::view_array($indexerObj->file_phash_arr); 01223 } 01224 } 01225 01226 // Link back to list. 01227 $content.= $this->linkList(); 01228 01229 return $content; 01230 } 01231 01239 function getUidRootLineForClosestTemplate($id) { 01240 $tmpl = t3lib_div::makeInstance("t3lib_tsparser_ext"); // Defined global here! 01241 $tmpl->tt_track = 0; // Do not log time-performance information 01242 $tmpl->init(); 01243 01244 // Gets the rootLine 01245 $sys_page = t3lib_div::makeInstance("t3lib_pageSelect"); 01246 $rootLine = $sys_page->getRootLine($id); 01247 $tmpl->runThroughTemplates($rootLine,0); // This generates the constants/config + hierarchy info for the template. 01248 01249 // Root line uids 01250 $rootline_uids = array(); 01251 foreach($tmpl->rootLine as $rlkey => $rldat) { 01252 $rootline_uids[$rlkey] = $rldat['uid']; 01253 } 01254 01255 return $rootline_uids; 01256 } 01257 01258 01259 01260 01261 01262 01263 01264 01265 01266 01267 01268 01269 /******************************** 01270 * 01271 * Indexing of configurations 01272 * 01273 *******************************/ 01274 01280 function extraIndexing() { 01281 01282 // Select index configurations on this page 01283 $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 01284 '*', 01285 'index_config', 01286 'pid = '.intval($this->pObj->id). 01287 ' AND hidden=0'. 01288 ' AND starttime<'.time() 01289 ); 01290 01291 01292 $rl = $this->getUidRootLineForClosestTemplate($this->pObj->id); 01293 01294 foreach($ftrows as $cfgRow) { 01295 switch($cfgRow['type']) { 01296 case 1: 01297 if ($cfgRow['table2index'] && isset($GLOBALS['TCA'][$cfgRow['table2index']])) { 01298 01299 // Init: 01300 $pid = intval($cfgRow['alternative_source_pid']) ? intval($cfgRow['alternative_source_pid']) : $this->pObj->id; 01301 $fieldList = t3lib_div::trimExplode(',',$cfgRow['fieldlist'],1); 01302 01303 // Select 01304 $recs = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( 01305 '*', 01306 $cfgRow['table2index'], 01307 'pid = '.intval($pid) 01308 ); 01309 01310 // Traverse: 01311 foreach($recs as $r) { 01312 // (Re)-Indexing a row from a table: 01313 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 01314 parse_str(str_replace('###UID###',$r['uid'],$cfgRow['get_params']),$GETparams); 01315 $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl, $GETparams, $cfgRow['chashcalc'] ? TRUE : FALSE); 01316 $indexerObj->backend_setFreeIndexUid($cfgRow['uid']); 01317 01318 $theContent = ''; 01319 foreach($fieldList as $k => $v) { 01320 if (!$k) { 01321 $theTitle = $r[$v]; 01322 } else { 01323 $theContent.= $r[$v].' '; 01324 } 01325 } 01326 #debug($theContent,$theTitle); 01327 $indexerObj->backend_indexAsTYPO3Page( 01328 $theTitle, 01329 '', 01330 '', 01331 $theContent, 01332 $GLOBALS['LANG']->charSet, 01333 $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['tstamp']], 01334 $r[$GLOBALS['TCA'][$cfgRow['table2index']]['ctrl']['crdate']], 01335 $r['uid'] 01336 ); 01337 01338 } 01339 #debug($recs); 01340 } 01341 break; 01342 case 2: 01343 $readpath = $cfgRow['filepath']; 01344 if (!t3lib_div::isAbsPath($readPath)) { 01345 $readpath = t3lib_div::getFileAbsFileName($readpath); 01346 } 01347 #debug($readpath,'$readpath'); 01348 01349 if (t3lib_div::isAllowedAbsPath($readpath)) { 01350 $extList = implode(',',t3lib_div::trimExplode(',',$cfgRow['extensions'],1)); 01351 $fileArr = array(); 01352 $files = t3lib_div::getAllFilesAndFoldersInPath($fileArr,$readpath,$extList,0,$cfgRow['depth']); 01353 $files = t3lib_div::removePrefixPathFromList($files,PATH_site); 01354 #debug($files); 01355 foreach($files as $path) { 01356 // (Re)-Indexing file on page. 01357 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 01358 $indexerObj->backend_initIndexer($this->pObj->id, 0, 0, '', $rl); 01359 $indexerObj->backend_setFreeIndexUid($cfgRow['uid']); 01360 $indexerObj->hash['phash'] = -1; // EXPERIMENT - but to avoid phash_t3 being written to file sections (otherwise they are removed when page is reindexed!!!) 01361 01362 $indexerObj->indexRegularDocument($path, TRUE); 01363 01364 #debug($indexerObj->internal_log,$resultRow['data_filename']); 01365 #debug($indexerObj->file_phash_arr,'file_phash_arr'); 01366 #debug($indexerObj->hash,'hash'); 01367 01368 } 01369 } 01370 break; 01371 case 3: 01372 if ($cfgRow['externalUrl']) { 01373 $this->indexExtUrlRecursively($cfgRow['externalUrl'], $cfgRow['depth'], $this->pObj->id, $rl, $cfgRow['uid']); 01374 } 01375 break; 01376 } 01377 } 01378 } 01379 01391 function indexExtUrlRecursively($url, $depth, $pageId, $rl, $cfgUid) { 01392 01393 // Index external URL: 01394 $indexerObj = &t3lib_div::makeInstance('tx_indexedsearch_indexer'); 01395 $indexerObj->backend_initIndexer($pageId, 0, 0, '', $rl); 01396 $indexerObj->backend_setFreeIndexUid($cfgUid); 01397 01398 $indexerObj->indexExternalUrl($url); 01399 $url_qParts = parse_url($url); 01400 01401 // Recursion: 01402 if ($depth>0) { 01403 $list = $indexerObj->extractHyperLinks($indexerObj->indexExternalUrl_content); 01404 01405 // Traverse links: 01406 foreach($list as $count => $linkInfo) { 01407 01408 // Decode entities: 01409 $linkSource = t3lib_div::htmlspecialchars_decode($linkInfo['href']); 01410 01411 $qParts = parse_url($linkSource); 01412 if (!$qParts['scheme']) { 01413 $linkSource = $url_qParts['scheme'].'://'.$url_qParts['host'].'/'.$linkSource; 01414 } 01415 01416 $this->indexExtUrlRecursively($linkSource, $depth-1, $pageId, $rl, $cfgUid); 01417 01418 // Temporary limit until we know how to handle hundreds of URLs with limited parsetime in PHP... 01419 if ($count>3) break; 01420 } 01421 } 01422 } 01423 01424 01425 01426 01427 01428 01429 01430 01431 01432 01433 01434 01435 /******************************** 01436 * 01437 * SQL functions 01438 * 01439 *******************************/ 01440 01448 function removeIndexedPhashRow($phashList,$clearPageCache=1) { 01449 $phashRows = t3lib_div::trimExplode(',',$phashList,1); 01450 01451 foreach($phashRows as $phash) { 01452 $phash = intval($phash); 01453 if ($phash>0) { 01454 01455 if ($clearPageCache) { 01456 // Clearing page cache: 01457 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('page_id', 'index_section', 'phash='.intval($phash)); 01458 if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) { 01459 $idList = array(); 01460 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01461 $idList[] = $row['page_id']; 01462 } 01463 $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pages', 'page_id IN ('.implode(',',$GLOBALS['TYPO3_DB']->cleanIntArray($idList)).')'); 01464 } 01465 } 01466 01467 // Removing old registrations for all tables. 01468 $tableArr = explode(',','index_phash,index_rel,index_section,index_grlist,index_fulltext,index_debug'); 01469 foreach($tableArr as $table) { 01470 $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); 01471 } 01472 01473 // Did not remove any index_section records for external files where phash_t3 points to this hash! 01474 } 01475 } 01476 } 01477 01485 function getGrListEntriesForPhash($phash,$gr_list) { 01486 $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_grlist', 'phash='.intval($phash)); 01487 $lines = array(); 01488 $isRemoved = 0; 01489 while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { 01490 if (!$isRemoved && !strcmp($row['gr_list'],$gr_list)) { 01491 $isRemoved = 1; 01492 } else { 01493 $lines[] = $row; 01494 } 01495 } 01496 return $lines; 01497 } 01498 01505 function processStopWords($stopWords) { 01506 01507 if ($GLOBALS['BE_USER']->isAdmin()) { 01508 // Traverse words 01509 foreach($stopWords as $wid => $state) { 01510 $fieldArray = array( 01511 'is_stopword' => $state 01512 ); 01513 $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_words', 'wid='.$wid, $fieldArray); 01514 } 01515 } 01516 } 01517 01525 function processPageKeywords($pageKeywords, $pageUid) { 01526 01527 // Get pages current keywords 01528 $pageRec = t3lib_BEfunc::getRecord('pages', $pageUid); 01529 $keywords = array_flip(t3lib_div::trimExplode(',', $pageRec['keywords'], 1)); 01530 01531 // Merge keywords: 01532 foreach($pageKeywords as $key => $v) { 01533 if ($v) { 01534 $keywords[$key]=1; 01535 } else { 01536 unset($keywords[$key]); 01537 } 01538 } 01539 01540 // Compile new list: 01541 $data = array(); 01542 $data['pages'][$pageUid]['keywords'] = implode(', ',array_keys($keywords)); 01543 01544 $tce = t3lib_div::makeInstance('t3lib_TCEmain'); 01545 $tce->stripslashes_values = 0; 01546 $tce->start($data,array()); 01547 $tce->process_datamap(); 01548 } 01549 } 01550 01551 01552 01553 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']) { 01554 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php']); 01555 } 01556 01557 ?>