Documentation TYPO3 par Ameos |
00001 <?php 00002 /*************************************************************** 00003 * Copyright notice 00004 * 00005 * (c) 1999-2005 Kasper Skaarhoj (kasperYYYY@typo3.com) 00006 * All rights reserved 00007 * 00008 * This script is part of the TYPO3 project. The TYPO3 project is 00009 * free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * The GNU General Public License can be found at 00015 * http://www.gnu.org/copyleft/gpl.html. 00016 * A copy is found in the textfile GPL.txt and important notices to the license 00017 * from the author is found in LICENSE.txt distributed with these scripts. 00018 * 00019 * 00020 * This script is distributed in the hope that it will be useful, 00021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00023 * GNU General Public License for more details. 00024 * 00025 * This copyright notice MUST APPEAR in all copies of the script! 00026 ***************************************************************/ 00106 class t3lib_parsehtml { 00107 var $caseShift_cache=array(); 00108 00109 00110 // *******************************************' 00111 // COPY FROM class.tslib_content.php: / BEGIN 00112 // substituteSubpart 00113 // Cleaned locally 2/2003 !!!! (so different from tslib_content version) 00114 // *******************************************' 00115 00123 function getSubpart($content, $marker) { 00124 if ($marker && strstr($content,$marker)) { 00125 $start = strpos($content, $marker)+strlen($marker); 00126 $stop = @strpos($content, $marker, $start+1); 00127 $sub = substr($content, $start, $stop-$start); 00128 00129 $reg=Array(); 00130 ereg('^[^<]*-->',$sub,$reg); 00131 $start+=strlen($reg[0]); 00132 00133 $reg=Array(); 00134 ereg('<!--[^>]*$',$sub,$reg); 00135 $stop-=strlen($reg[0]); 00136 00137 return substr($content, $start, $stop-$start); 00138 } 00139 } 00140 00151 function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0) { 00152 $start = strpos($content, $marker); 00153 $stop = @strpos($content, $marker, $start+1)+strlen($marker); 00154 if ($start && $stop>$start) { 00155 // code before 00156 $before = substr($content, 0, $start); 00157 $reg=Array(); 00158 ereg('<!--[^>]*$',$before,$reg); 00159 $start-=strlen($reg[0]); 00160 if ($keepMarker) { 00161 $reg_k=Array(); 00162 if ($reg[0]) ereg('^[^>]*-->',substr($content,$start),$reg_k); 00163 $before_marker = substr($content, $start, strlen($reg_k[0]?$reg_k[0]:$marker)); 00164 } 00165 $before = substr($content, 0, $start); 00166 // code after 00167 $after = substr($content, $stop); 00168 $reg=Array(); 00169 ereg('^[^<]*-->',$after,$reg); 00170 $stop+=strlen($reg[0]); 00171 if ($keepMarker) { 00172 $reg_k=Array(); 00173 if ($reg[0]) ereg('<!--[^<]*$',substr($content,0,$stop),$reg_k); 00174 $sLen = strlen($reg_k[0]?$reg_k[0]:$marker); 00175 $after_marker = substr($content, $stop-$sLen,$sLen); 00176 } 00177 $after = substr($content, $stop); 00178 00179 00180 // replace? 00181 if (is_array($subpartContent)) { 00182 $substContent=$subpartContent[0].$this->getSubpart($content,$marker).$subpartContent[1]; 00183 } else { 00184 $substContent=$subpartContent; 00185 } 00186 00187 if ($recursive && strpos($after, $marker)) { 00188 return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$this->substituteSubpart($after,$marker,$subpartContent); 00189 } else { 00190 return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$after; 00191 } 00192 } else { 00193 return $content; 00194 } 00195 } 00196 // *******************************************' 00197 // COPY FROM class.tslib_content.php: / END 00198 // *******************************************' 00199 00200 00201 00202 00203 00204 00205 00206 /************************************ 00207 * 00208 * Parsing HTML code 00209 * 00210 ************************************/ 00211 00223 function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) { 00224 $tags=array_unique(t3lib_div::trimExplode(',',$tag,1)); 00225 $regexStr = '</?('.implode('|',$tags).')(>|[[:space:]][^>]*>)'; 00226 00227 $parts = spliti($regexStr,$content); 00228 00229 $newParts=array(); 00230 $pointer=strlen($parts[0]); 00231 $buffer=$parts[0]; 00232 $nested=0; 00233 reset($parts); 00234 next($parts); 00235 while(list($k,$v)=each($parts)) { 00236 $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0; 00237 $tagLen = strcspn(substr($content,$pointer),'>')+1; 00238 00239 if (!$isEndTag) { // We meet a start-tag: 00240 if (!$nested) { // Ground level: 00241 $newParts[]=$buffer; // previous buffer stored 00242 $buffer=''; 00243 } 00244 $nested++; // We are inside now! 00245 $mbuffer=substr($content,$pointer,strlen($v)+$tagLen); // New buffer set and pointer increased 00246 $pointer+=strlen($mbuffer); 00247 $buffer.=$mbuffer; 00248 } else { // If we meet an endtag: 00249 $nested--; // decrease nested-level 00250 $eliminated=0; 00251 if ($eliminateExtraEndTags && $nested<0) { 00252 $nested=0; 00253 $eliminated=1; 00254 } else { 00255 $buffer.=substr($content,$pointer,$tagLen); // In any case, add the endtag to current buffer and increase pointer 00256 } 00257 $pointer+=$tagLen; 00258 if (!$nested && !$eliminated) { // if we're back on ground level, (and not by eliminating tags... 00259 $newParts[]=$buffer; 00260 $buffer=''; 00261 } 00262 $mbuffer=substr($content,$pointer,strlen($v)); // New buffer set and pointer increased 00263 $pointer+=strlen($mbuffer); 00264 $buffer.=$mbuffer; 00265 } 00266 00267 } 00268 $newParts[]=$buffer; 00269 return $newParts; 00270 } 00271 00284 function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0) { 00285 $parts = $this->splitIntoBlock($tag,$content,TRUE); 00286 foreach($parts as $k => $v) { 00287 if ($k%2) { 00288 $firstTagName = $this->getFirstTagName($v, TRUE); 00289 $tagsArray = array(); 00290 $tagsArray['tag_start'] = $this->getFirstTag($v); 00291 $tagsArray['tag_end'] = '</'.$firstTagName.'>'; 00292 $tagsArray['tag_name'] = strtolower($firstTagName); 00293 $tagsArray['add_level'] = 1; 00294 $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']); 00295 00296 if ($callBackTags) $tagsArray = $procObj->$callBackTags($tagsArray,$level); 00297 00298 $parts[$k] = 00299 $tagsArray['tag_start']. 00300 $tagsArray['content']. 00301 $tagsArray['tag_end']; 00302 } else { 00303 if ($callBackContent) $parts[$k] = $procObj->$callBackContent($parts[$k],$level); 00304 } 00305 } 00306 00307 return implode('',$parts); 00308 } 00309 00320 function splitTags($tag,$content) { 00321 $tags = t3lib_div::trimExplode(',',$tag,1); 00322 $regexStr = '<('.implode('|',$tags).')(>|\/>|[[:space:]][^>]*>)'; 00323 $parts = spliti($regexStr,$content); 00324 00325 $pointer = strlen($parts[0]); 00326 $newParts = array(); 00327 $newParts[] = $parts[0]; 00328 reset($parts); 00329 next($parts); 00330 while(list($k,$v)=each($parts)) { 00331 $tagLen = strcspn(substr($content,$pointer),'>')+1; 00332 00333 // Set tag: 00334 $tag = substr($content,$pointer,$tagLen); // New buffer set and pointer increased 00335 $newParts[] = $tag; 00336 $pointer+= strlen($tag); 00337 00338 // Set content: 00339 $newParts[] = $v; 00340 $pointer+= strlen($v); 00341 } 00342 return $newParts; 00343 } 00344 00354 function getAllParts($parts,$tag_parts=1,$include_tag=1) { 00355 reset($parts); 00356 $newParts=array(); 00357 while(list($k,$v)=each($parts)) { 00358 if (($k+($tag_parts?0:1))%2) { 00359 if (!$include_tag) $v=$this->removeFirstAndLastTag($v); 00360 $newParts[]=$v; 00361 } 00362 } 00363 return $newParts; 00364 } 00365 00373 function removeFirstAndLastTag($str) { 00374 // First: 00375 $endLen = strcspn($str,'>')+1; 00376 $str = substr($str,$endLen); 00377 // Last: 00378 $str = strrev($str); 00379 $endLen = strcspn($str,'<')+1; 00380 $str = substr($str,$endLen); 00381 // return 00382 return strrev($str); 00383 } 00384 00392 function getFirstTag($str) { 00393 // First: 00394 $endLen = strcspn($str,'>')+1; 00395 $str = substr($str,0,$endLen); 00396 return $str; 00397 } 00398 00407 function getFirstTagName($str,$preserveCase=FALSE) { 00408 list($tag) = split('[[:space:]]',substr(trim($this->getFirstTag($str)),1,-1), 2); 00409 if (!$preserveCase) $tag = strtoupper($tag); 00410 00411 return trim($tag); 00412 } 00413 00422 function get_tag_attributes($tag,$deHSC=0) { 00423 list($components,$metaC) = $this->split_tag_attributes($tag); 00424 $name = ''; // attribute name is stored here 00425 $valuemode = ''; 00426 $attributes = array(); 00427 $attributesMeta = array(); 00428 if (is_array($components)) { 00429 while (list($key,$val) = each ($components)) { 00430 if ($val != '=') { // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value 00431 if ($valuemode) { 00432 if ($name) { 00433 $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val; 00434 $attributesMeta[$name]['dashType']=$metaC[$key]; 00435 $name = ''; 00436 } 00437 } else { 00438 if ($namekey = ereg_replace('[^a-zA-Z0-9_:-]','',$val)) { 00439 $name = strtolower($namekey); 00440 $attributesMeta[$name]=array(); 00441 $attributesMeta[$name]['origTag']=$namekey; 00442 $attributes[$name] = ''; 00443 } 00444 } 00445 $valuemode = ''; 00446 } else { 00447 $valuemode = 'on'; 00448 } 00449 } 00450 if (is_array($attributes)) reset($attributes); 00451 return array($attributes,$attributesMeta); 00452 } 00453 } 00454 00464 function split_tag_attributes($tag) { 00465 $tag_tmp = trim(eregi_replace ('^<[^[:space:]]*','',trim($tag))); 00466 // Removes any > in the end of the string 00467 $tag_tmp = trim(eregi_replace ('>$','',$tag_tmp)); 00468 00469 $metaValue = array(); 00470 $value = array(); 00471 while (strcmp($tag_tmp,'')) { // Compared with empty string instead , 030102 00472 $firstChar=substr($tag_tmp,0,1); 00473 if (!strcmp($firstChar,'"') || !strcmp($firstChar,"'")) { 00474 $reg=explode($firstChar,$tag_tmp,3); 00475 $value[]=$reg[1]; 00476 $metaValue[]=$firstChar; 00477 $tag_tmp=trim($reg[2]); 00478 } elseif (!strcmp($firstChar,'=')) { 00479 $value[] = '='; 00480 $metaValue[]=''; 00481 $tag_tmp = trim(substr($tag_tmp,1)); // Removes = chars. 00482 } else { 00483 // There are '' around the value. We look for the next ' ' or '>' 00484 $reg = split('[[:space:]=]',$tag_tmp,2); 00485 $value[] = trim($reg[0]); 00486 $metaValue[]=''; 00487 $tag_tmp = trim(substr($tag_tmp,strlen($reg[0]),1).$reg[1]); 00488 } 00489 } 00490 if (is_array($value)) reset($value); 00491 return array($value,$metaValue); 00492 } 00493 00507 function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area') { 00508 $content = strtolower($content); 00509 $analyzedOutput=array(); 00510 $analyzedOutput['counts']=array(); // Counts appearances of start-tags 00511 $analyzedOutput['errors']=array(); // Lists ERRORS 00512 $analyzedOutput['warnings']=array(); // Lists warnings. 00513 $analyzedOutput['blocks']=array(); // Lists stats for block-tags 00514 $analyzedOutput['solo']=array(); // Lists stats for solo-tags 00515 00516 // Block tags, must have endings... 00517 $blockTags = explode(',',$blockTags); 00518 foreach($blockTags as $tagName) { 00519 $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1; 00520 $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1; 00521 $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd); 00522 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin; 00523 if ($countBegin-$countEnd) { 00524 if ($countBegin-$countEnd > 0) { 00525 $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!'; 00526 } else { 00527 $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.'; 00528 } 00529 } 00530 } 00531 00532 // Solo tags, must NOT have endings... 00533 $soloTags = explode(',',$soloTags); 00534 foreach($soloTags as $tagName) { 00535 $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1; 00536 $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1; 00537 $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd); 00538 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin; 00539 if ($countEnd) { 00540 $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).'; 00541 } 00542 } 00543 00544 return $analyzedOutput; 00545 } 00546 00547 00548 00549 00550 00551 00552 00553 00554 00555 00556 00557 00558 /********************************* 00559 * 00560 * Clean HTML code 00561 * 00562 *********************************/ 00563 00600 function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array()) { 00601 $newContent = array(); 00602 $tokArr = explode('<',$content); 00603 $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig); 00604 next($tokArr); 00605 00606 $c = 1; 00607 $tagRegister = array(); 00608 $tagStack = array(); 00609 while(list(,$tok)=each($tokArr)) { 00610 $firstChar = substr($tok,0,1); 00611 # if (strcmp(trim($firstChar),'')) { // It is a tag... 00612 if (ereg('[[:alnum:]\/]',$firstChar)) { // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..> 00613 $tagEnd = strcspn($tok,'>'); 00614 if (strlen($tok)!=$tagEnd) { // If there is and end-bracket... 00615 $endTag = $firstChar=='/' ? 1 : 0; 00616 $tagContent = substr($tok,$endTag,$tagEnd-$endTag); 00617 $tagParts = split('[[:space:]]',$tagContent,2); 00618 $tagName = strtolower($tagParts[0]); 00619 if (isset($tags[$tagName])) { 00620 if (is_array($tags[$tagName])) { // If there is processing to do for the tag: 00621 00622 if (!$endTag) { // If NOT an endtag, do attribute processing (added dec. 2003) 00623 // Override attributes 00624 if (strcmp($tags[$tagName]['overrideAttribs'],'')) { 00625 $tagParts[1]=$tags[$tagName]['overrideAttribs']; 00626 } 00627 00628 // Allowed tags 00629 if (strcmp($tags[$tagName]['allowedAttribs'],'')) { 00630 if (!strcmp($tags[$tagName]['allowedAttribs'],'0')) { // No attribs allowed 00631 $tagParts[1]=''; 00632 } elseif (trim($tagParts[1])) { 00633 $tagAttrib = $this->get_tag_attributes($tagParts[1]); 00634 $tagParts[1]=''; 00635 $newTagAttrib = array(); 00636 $tList = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1); 00637 while(list(,$allowTag)=each($tList)) { 00638 if (isset($tagAttrib[0][$allowTag])) $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag]; 00639 } 00640 $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]); 00641 } 00642 } 00643 00644 // Fixed attrib values 00645 if (is_array($tags[$tagName]['fixAttrib'])) { 00646 $tagAttrib = $this->get_tag_attributes($tagParts[1]); 00647 $tagParts[1]=''; 00648 reset($tags[$tagName]['fixAttrib']); 00649 while(list($attr,$params)=each($tags[$tagName]['fixAttrib'])) { 00650 if (strlen($params['set'])) $tagAttrib[0][$attr] = $params['set']; 00651 if (strlen($params['unset'])) unset($tagAttrib[0][$attr]); 00652 if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr])) $tagAttrib[0][$attr]=$params['default']; 00653 if ($params['always'] || isset($tagAttrib[0][$attr])) { 00654 if ($params['trim']) {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);} 00655 if ($params['intval']) {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);} 00656 if ($params['lower']) {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);} 00657 if ($params['upper']) {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);} 00658 if ($params['range']) { 00659 if (isset($params['range'][1])) { 00660 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1])); 00661 } else { 00662 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0])); 00663 } 00664 } 00665 if (is_array($params['list'])) { 00666 if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName))) $tagAttrib[0][$attr]=$params['list'][0]; 00667 } 00668 if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],''))) { 00669 unset($tagAttrib[0][$attr]); 00670 } 00671 if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp']))) { 00672 unset($tagAttrib[0][$attr]); 00673 } 00674 if ($params['prefixLocalAnchors']) { 00675 if (substr($tagAttrib[0][$attr],0,1)=='#') { 00676 $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL'); 00677 $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr]; 00678 if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL'))) { 00679 $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL'))); 00680 } 00681 } 00682 } 00683 if ($params['prefixRelPathWith']) { 00684 $urlParts = parse_url($tagAttrib[0][$attr]); 00685 if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/') { // If it is NOT an absolute URL (by http: or starting "/") 00686 $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr]; 00687 } 00688 } 00689 if ($params['userFunc']) { 00690 $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this); 00691 } 00692 } 00693 } 00694 $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]); 00695 } 00696 } else { // If endTag, remove any possible attributes: 00697 $tagParts[1]=''; 00698 } 00699 00700 // Protecting the tag by converting < and > to < and > ?? 00701 if ($tags[$tagName]['protect']) { 00702 $lt = '<'; $gt = '>'; 00703 } else { 00704 $lt = '<'; $gt = '>'; 00705 } 00706 // Remapping tag name? 00707 if ($tags[$tagName]['remap']) $tagParts[0] = $tags[$tagName]['remap']; 00708 00709 // rmTagIfNoAttrib 00710 if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib']) { 00711 $setTag=1; 00712 00713 if ($tags[$tagName]['nesting']) { 00714 if (!is_array($tagRegister[$tagName])) $tagRegister[$tagName]=array(); 00715 00716 if ($endTag) { 00717 /* if ($tags[$tagName]['nesting']=='global') { 00718 $lastEl = end($tagStack); 00719 $correctTag = !strcmp($tagName,$lastEl); 00720 } else $correctTag=1; 00721 */ 00722 $correctTag=1; 00723 if ($tags[$tagName]['nesting']=='global') { 00724 $lastEl = end($tagStack); 00725 if (strcmp($tagName,$lastEl)) { 00726 if (in_array($tagName,$tagStack)) { 00727 while(count($tagStack) && strcmp($tagName,$lastEl)) { 00728 $elPos = end($tagRegister[$lastEl]); 00729 unset($newContent[$elPos]); 00730 00731 array_pop($tagRegister[$lastEl]); 00732 array_pop($tagStack); 00733 $lastEl = end($tagStack); 00734 } 00735 } else { 00736 $correctTag=0; // In this case the 00737 } 00738 } 00739 } 00740 if (!count($tagRegister[$tagName]) || !$correctTag) { 00741 $setTag=0; 00742 } else { 00743 array_pop($tagRegister[$tagName]); 00744 if ($tags[$tagName]['nesting']=='global') {array_pop($tagStack);} 00745 } 00746 } else { 00747 array_push($tagRegister[$tagName],$c); 00748 if ($tags[$tagName]['nesting']=='global') {array_push($tagStack,$tagName);} 00749 } 00750 } 00751 00752 if ($setTag) { 00753 // Setting the tag 00754 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='<'); 00755 } 00756 } 00757 } else { 00758 $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag); 00759 } 00760 } elseif ($keepAll) { // This is if the tag was not defined in the array for processing: 00761 if (!strcmp($keepAll,'protect')) { 00762 $lt = '<'; $gt = '>'; 00763 } else { 00764 $lt = '<'; $gt = '>'; 00765 } 00766 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='<'); 00767 } 00768 $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig); 00769 } else { 00770 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig); // There were not end-bracket, so no tag... 00771 } 00772 } else { 00773 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig); // It was not a tag anyways 00774 } 00775 } 00776 00777 // Unsetting tags: 00778 reset($tagRegister); 00779 while(list($tag,$positions)=each($tagRegister)) { 00780 reset($positions); 00781 while(list(,$pKey)=each($positions)) { 00782 unset($newContent[$pKey]); 00783 } 00784 } 00785 00786 return implode('',$newContent); 00787 } 00788 00796 function bidir_htmlspecialchars($value,$dir) { 00797 if ($dir==1) { 00798 $value = htmlspecialchars($value); 00799 } elseif ($dir==2) { 00800 $value = t3lib_div::deHSCentities(htmlspecialchars($value)); 00801 } elseif ($dir==-1) { 00802 $value = str_replace('>','>',$value); 00803 $value = str_replace('<','<',$value); 00804 $value = str_replace('"','"',$value); 00805 $value = str_replace('&','&',$value); 00806 } 00807 return $value; 00808 } 00809 00819 function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='') { 00820 00821 $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content); 00822 foreach($parts as $k => $v) { 00823 if ($k%2) { 00824 $params = $this->get_tag_attributes($v,1); 00825 $tagEnd = substr($v,-2)=='/>' ? ' />' : '>'; // Detect tag-ending so that it is re-applied correctly. 00826 $firstTagName = $this->getFirstTagName($v); // The 'name' of the first tag 00827 $somethingDone=0; 00828 $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix; 00829 switch(strtolower($firstTagName)) { 00830 // background - attribute: 00831 case 'td': 00832 case 'body': 00833 case 'table': 00834 $src = $params[0]['background']; 00835 if ($src) { 00836 $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix); 00837 $somethingDone=1; 00838 } 00839 break; 00840 // src attribute 00841 case 'img': 00842 case 'input': 00843 case 'script': 00844 case 'embed': 00845 $src = $params[0]['src']; 00846 if ($src) { 00847 $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix); 00848 $somethingDone=1; 00849 } 00850 break; 00851 case 'link': 00852 case 'a': 00853 $src = $params[0]['href']; 00854 if ($src) { 00855 $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix); 00856 $somethingDone=1; 00857 } 00858 break; 00859 // action attribute 00860 case 'form': 00861 $src = $params[0]['action']; 00862 if ($src) { 00863 $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix); 00864 $somethingDone=1; 00865 } 00866 break; 00867 } 00868 if ($somethingDone) { 00869 $tagParts = split('[[:space:]]',$v,2); 00870 $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]); 00871 $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]). 00872 $tagEnd; 00873 } 00874 } 00875 } 00876 $content = implode('',$parts); 00877 00878 // Fix <style> section: 00879 $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix; 00880 if (strlen($prefix)) { 00881 $parts = $this->splitIntoBlock('style',$content); 00882 foreach($parts as $k => $v) { 00883 if ($k%2) { 00884 $parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]); 00885 } 00886 } 00887 $content = implode('',$parts); 00888 } 00889 00890 return $content; 00891 } 00892 00902 function prefixRelPath($prefix,$srcVal,$suffix='') { 00903 $pU = parse_url($srcVal); 00904 if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/') { // If not an absolute URL. 00905 $srcVal = $prefix.$srcVal.$suffix; 00906 } 00907 return $srcVal; 00908 } 00909 00920 function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0) { 00921 $fontSplit = $this->splitIntoBlock('font',$value); // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set! 00922 reset($fontSplit); 00923 while(list($k,$v)=each($fontSplit)) { 00924 if ($k%2) { // font: 00925 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v)); 00926 $newAttribs=array(); 00927 if ($keepFace && $attribArray['face']) $newAttribs[]='face="'.$attribArray['face'].'"'; 00928 if ($keepSize && $attribArray['size']) $newAttribs[]='size="'.$attribArray['size'].'"'; 00929 if ($keepColor && $attribArray['color']) $newAttribs[]='color="'.$attribArray['color'].'"'; 00930 00931 $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor); 00932 if (count($newAttribs)) { 00933 $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>'; 00934 } else { 00935 $fontSplit[$k]=$innerContent; 00936 } 00937 } 00938 } 00939 return implode('',$fontSplit); 00940 } 00941 00951 function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') { 00952 00953 foreach($tags as $from => $to) { 00954 $value = eregi_replace($ltChar.$from.'>',$ltChar2.$to.'>',$value); 00955 $value = eregi_replace($ltChar.$from.'[[:space:]]([^>]*)>',$ltChar2.$to.' \\1>',$value); 00956 $value = eregi_replace($ltChar.'\/'.$from.'[^>]*>',$ltChar2.'/'.$to.'>',$value); 00957 } 00958 return $value; 00959 } 00960 00968 function unprotectTags($content,$tagList='') { 00969 $tagsArray = t3lib_div::trimExplode(',',$tagList,1); 00970 $contentParts = explode('<',$content); 00971 next($contentParts); // bypass the first 00972 while(list($k,$tok)=each($contentParts)) { 00973 $firstChar = substr($tok,0,1); 00974 if (strcmp(trim($firstChar),'')) { 00975 $subparts = explode('>',$tok,2); 00976 $tagEnd = strlen($subparts[0]); 00977 if (strlen($tok)!=$tagEnd) { 00978 $endTag = $firstChar=='/' ? 1 : 0; 00979 $tagContent = substr($tok,$endTag,$tagEnd-$endTag); 00980 $tagParts = split('[[:space:]]',$tagContent,2); 00981 $tagName = strtolower($tagParts[0]); 00982 if (!strcmp($tagList,'') || in_array($tagName,$tagsArray)) { 00983 $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1]; 00984 } else $contentParts[$k] = '<'.$tok; 00985 } else $contentParts[$k] = '<'.$tok; 00986 } else $contentParts[$k] = '<'.$tok; 00987 } 00988 00989 return implode('',$contentParts); 00990 } 00991 01001 function stripTagsExcept($value,$tagList) { 01002 $tags=t3lib_div::trimExplode(',',$tagList,1); 01003 $forthArr=array(); 01004 $backArr=array(); 01005 while(list(,$theTag)=each($tags)) { 01006 $forthArr[$theTag]=md5($theTag); 01007 $backArr[md5($theTag)]=$theTag; 01008 } 01009 $value = $this->mapTags($value,$forthArr,'<','_'); 01010 $value=strip_tags($value); 01011 $value = $this->mapTags($value,$backArr,'_','<'); 01012 return $value; 01013 } 01014 01024 function caseShift($str,$flag,$cacheKey='') { 01025 if (is_array($str)) { 01026 if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey])) { 01027 reset($str); 01028 while(list($k)=each($str)) { 01029 $str[$k] = strtoupper($str[$k]); 01030 } 01031 if ($cacheKey) $this->caseShift_cache[$cacheKey]=$str; 01032 } else { 01033 $str = $this->caseShift_cache[$cacheKey]; 01034 } 01035 } elseif (!$flag) $str = strtoupper($str); 01036 return $str; 01037 } 01038 01048 function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0) { 01049 $accu=array(); 01050 reset($tagAttrib); 01051 while(list($k,$v)=each($tagAttrib)) { 01052 if ($xhtmlClean) { 01053 $attr=strtolower($k); 01054 if (strcmp($v,'') || isset($meta[$k]['dashType'])) { 01055 $attr.='="'.htmlspecialchars($v).'"'; 01056 } 01057 } else { 01058 $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k; 01059 if (strcmp($v,'') || isset($meta[$k]['dashType'])) { 01060 $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"'); 01061 $attr.='='.$dash.$v.$dash; 01062 } 01063 } 01064 $accu[]=$attr; 01065 } 01066 return implode(' ',$accu); 01067 } 01068 01077 function get_tag_attributes_classic($tag,$deHSC=0) { 01078 $attr=$this->get_tag_attributes($tag,$deHSC); 01079 return is_array($attr[0])?$attr[0]:array(); 01080 } 01081 01090 function indentLines($content, $number=1, $indentChar="\t") { 01091 $preTab = str_pad('', $number*strlen($indentChar), $indentChar); 01092 $lines = explode(chr(10),str_replace(chr(13),'',$content)); 01093 while(list($k,$v) = each($lines)) { 01094 $lines[$k] = $preTab.$v; 01095 } 01096 return implode(chr(10), $lines); 01097 } 01098 01107 function HTMLparserConfig($TSconfig,$keepTags=array()) { 01108 // Allow tags (base list, merged with incoming array) 01109 $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1)); 01110 $keepTags = array_merge($alTags,$keepTags); 01111 01112 // Set config properties. 01113 if (is_array($TSconfig['tags.'])) { 01114 reset($TSconfig['tags.']); 01115 while(list($key,$tagC)=each($TSconfig['tags.'])) { 01116 if (!is_array($tagC) && $key==strtolower($key)) { 01117 if (!strcmp($tagC,'0')) unset($keepTags[$key]); 01118 if (!strcmp($tagC,'1') && !isset($keepTags[$key])) $keepTags[$key]=1; 01119 } 01120 } 01121 01122 reset($TSconfig['tags.']); 01123 while(list($key,$tagC)=each($TSconfig['tags.'])) { 01124 if (is_array($tagC) && $key==strtolower($key)) { 01125 $key=substr($key,0,-1); 01126 if (!is_array($keepTags[$key])) $keepTags[$key]=array(); 01127 if (is_array($tagC['fixAttrib.'])) { 01128 reset($tagC['fixAttrib.']); 01129 while(list($atName,$atConfig)=each($tagC['fixAttrib.'])) { 01130 if (is_array($atConfig)) { 01131 $atName=substr($atName,0,-1); 01132 if (!is_array($keepTags[$key]['fixAttrib'][$atName])) { 01133 $keepTags[$key]['fixAttrib'][$atName]=array(); 01134 } 01135 $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble... 01136 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],'')) $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']); 01137 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],'')) $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']); 01138 } 01139 } 01140 } 01141 unset($tagC['fixAttrib.']); 01142 unset($tagC['fixAttrib']); 01143 $keepTags[$key] = array_merge($keepTags[$key],$tagC); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble... 01144 } 01145 } 01146 } 01147 // localNesting 01148 if ($TSconfig['localNesting']) { 01149 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1); 01150 while(list(,$tn)=each($lN)) { 01151 if (isset($keepTags[$tn])) { 01152 $keepTags[$tn]['nesting']=1; 01153 } 01154 } 01155 } 01156 if ($TSconfig['globalNesting']) { 01157 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1); 01158 while(list(,$tn)=each($lN)) { 01159 if (isset($keepTags[$tn])) { 01160 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array(); 01161 $keepTags[$tn]['nesting']='global'; 01162 } 01163 } 01164 } 01165 if ($TSconfig['rmTagIfNoAttrib']) { 01166 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1); 01167 while(list(,$tn)=each($lN)) { 01168 if (isset($keepTags[$tn])) { 01169 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array(); 01170 $keepTags[$tn]['rmTagIfNoAttrib']=1; 01171 } 01172 } 01173 } 01174 if ($TSconfig['noAttrib']) { 01175 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1); 01176 while(list(,$tn)=each($lN)) { 01177 if (isset($keepTags[$tn])) { 01178 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array(); 01179 $keepTags[$tn]['allowedAttribs']=0; 01180 } 01181 } 01182 } 01183 if ($TSconfig['removeTags']) { 01184 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1); 01185 while(list(,$tn)=each($lN)) { 01186 $keepTags[$tn]=array(); 01187 $keepTags[$tn]['allowedAttribs']=0; 01188 $keepTags[$tn]['rmTagIfNoAttrib']=1; 01189 } 01190 } 01191 01192 // Create additional configuration: 01193 $addConfig=array(); 01194 if ($TSconfig['xhtml_cleaning']) { 01195 $addConfig['xhtml']=1; 01196 } 01197 01198 return array( 01199 $keepTags, 01200 ''.$TSconfig['keepNonMatchedTags'], 01201 intval($TSconfig['htmlSpecialChars']), 01202 $addConfig 01203 ); 01204 } 01205 01231 function XHTML_clean($content) { 01232 $content = $this->HTMLcleaner( 01233 $content, 01234 array(), // No tags treated specially 01235 1, // Keep ALL tags. 01236 0, // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break... 01237 array('xhtml' => 1) 01238 ); 01239 return $content; 01240 } 01241 01253 function processTag($value,$conf,$endTag,$protected=0) { 01254 // Return immediately if protected or no parameters 01255 if ($protected || !count($conf)) return $value; 01256 // OK then, begin processing for XHTML output: 01257 // STILL VERY EXPERIMENTAL!! 01258 if ($conf['xhtml']) { 01259 if ($endTag) { // Endtags are just set lowercase right away 01260 $value = strtolower($value); 01261 } elseif (substr($value,0,2)!='<!') { // ... and comments are ignored. 01262 $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1)); // Finding inner value with out < > 01263 list($tagName,$tagP)=split('[[:space:]]',$inValue,2); // Separate attributes and tagname 01264 $tagName = strtolower($tagName); 01265 01266 // Process attributes 01267 $tagAttrib = $this->get_tag_attributes($tagP); 01268 if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt'])) $tagAttrib[0]['alt']=''; // Set alt attribute for all images (not XHTML though...) 01269 if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type'])) $tagAttrib[0]['type']='text/javascript'; // Set type attribute for all script-tags 01270 $outA=array(); 01271 reset($tagAttrib[0]); 01272 while(list($attrib_name,$attrib_value)=each($tagAttrib[0])) { 01273 // Set attributes: lowercase, always in quotes, with htmlspecialchars converted. 01274 $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"'; 01275 } 01276 $newTag='<'.trim($tagName.' '.implode(' ',$outA)); 01277 // All tags that are standalone (not wrapping, not having endtags) should be ended with '/>' 01278 if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>') { 01279 $newTag.=' />'; 01280 } else { 01281 $newTag.='>'; 01282 } 01283 $value = $newTag; 01284 } 01285 } 01286 01287 return $value; 01288 } 01289 01299 function processContent($value,$dir,$conf) { 01300 if ($dir!=0) $value = $this->bidir_htmlspecialchars($value,$dir); 01301 return $value; 01302 } 01303 } 01304 01305 01306 01307 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']) { 01308 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']); 01309 } 01310 ?>