Documentation TYPO3 par Ameos |
00001 <?php 00002 /*************************************************************** 00003 * Copyright notice 00004 * 00005 * (c) 1999-2006 Kasper Skaarhoj (kasperYYYY@typo3.com) 00006 * All rights reserved 00007 * 00008 * This script is part of the TYPO3 project. The TYPO3 project is 00009 * free software; you can redistribute it and/or modify 00010 * it under the terms of the GNU General Public License as published by 00011 * the Free Software Foundation; either version 2 of the License, or 00012 * (at your option) any later version. 00013 * 00014 * The GNU General Public License can be found at 00015 * http://www.gnu.org/copyleft/gpl.html. 00016 * A copy is found in the textfile GPL.txt and important notices to the license 00017 * from the author is found in LICENSE.txt distributed with these scripts. 00018 * 00019 * 00020 * This script is distributed in the hope that it will be useful, 00021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00023 * GNU General Public License for more details. 00024 * 00025 * This copyright notice MUST APPEAR in all copies of the script! 00026 ***************************************************************/ 00106 class t3lib_parsehtml { 00107 var $caseShift_cache=array(); 00108 00109 00110 // *******************************************' 00111 // COPY FROM class.tslib_content.php: / BEGIN 00112 // substituteSubpart 00113 // Cleaned locally 2/2003 !!!! (so different from tslib_content version) 00114 // *******************************************' 00115 00123 function getSubpart($content, $marker) { 00124 $start = strpos($content, $marker); 00125 if ($start===false) { return ''; } 00126 $start += strlen($marker); 00127 $stop = strpos($content, $marker, $start); 00128 // Q: What shall get returned if no stop marker is given /*everything till the end*/ or nothing 00129 if ($stop===false) { return /*substr($content, $start)*/ ''; } 00130 $content = substr($content, $start, $stop-$start); 00131 $matches = array(); 00132 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1) { 00133 return $matches[2]; 00134 } 00135 $matches = array(); 00136 if (preg_match('/(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1) { 00137 return $matches[1]; 00138 } 00139 $matches = array(); 00140 if (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $content, $matches)===1) { 00141 return $matches[2]; 00142 } 00143 return $content; 00144 } 00145 00156 function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0) { 00157 $start = strpos($content, $marker); 00158 if ($start===false) { return $content; } 00159 $startAM = $start+strlen($marker); 00160 $stop = strpos($content, $marker, $startAM); 00161 if ($stop===false) { return $content; } 00162 $stopAM = $stop+strlen($marker); 00163 $before = substr($content, 0, $start); 00164 $after = substr($content, $stopAM); 00165 $between = substr($content, $startAM, $stop-$startAM); 00166 00167 if ($recursive) { 00168 $after = t3lib_parsehtml::substituteSubpart($after, $marker, $subpartContent, $recursive, $keepMarker); 00169 } 00170 00171 if ($keepMarker) { 00172 $matches = array(); 00173 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) { 00174 $before .= $marker.$matches[1]; 00175 $between = $matches[2]; 00176 $after = $matches[3].$marker.$after; 00177 } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) { 00178 $before .= $marker; 00179 $between = $matches[1]; 00180 $after = $matches[2].$marker.$after; 00181 } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1) { 00182 $before .= $marker.$matches[1]; 00183 $between = $matches[2]; 00184 $after = $marker.$after; 00185 } else { 00186 $before .= $marker; 00187 $after = $marker.$after; 00188 } 00189 } else { 00190 $matches = array(); 00191 if (preg_match('/^(.*)\<\!\-\-[^\>]*$/s', $before, $matches)===1) { 00192 $before = $matches[1]; 00193 } 00194 if (is_array($subpartContent)) { 00195 $matches = array(); 00196 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) { 00197 $between = $matches[2]; 00198 } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) { 00199 $between = $matches[1]; 00200 } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1) { 00201 $between = $matches[2]; 00202 } 00203 } 00204 $matches = array(); 00205 if (preg_match('/^[^\<]*\-\-\>(.*)$/s', $after, $matches)===1) { 00206 $after = $matches[1]; 00207 } 00208 } 00209 00210 if (is_array($subpartContent)) { 00211 $between = $subpartContent[0].$between.$subpartContent[1]; 00212 } else { 00213 $between = $subpartContent; 00214 } 00215 00216 return $before.$between.$after; 00217 } 00218 00219 00220 // *******************************************' 00221 // COPY FROM class.tslib_content.php: / END 00222 // *******************************************' 00223 00224 00225 00226 00227 00228 00229 00230 /************************************ 00231 * 00232 * Parsing HTML code 00233 * 00234 ************************************/ 00235 00247 function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) { 00248 $tags=array_unique(t3lib_div::trimExplode(',',$tag,1)); 00249 $regexStr = '/\<\/?('.implode('|', $tags).')(\s*\>|\s[^\>]*\>)/si'; 00250 00251 $parts = preg_split($regexStr, $content); 00252 00253 $newParts=array(); 00254 $pointer=strlen($parts[0]); 00255 $buffer=$parts[0]; 00256 $nested=0; 00257 reset($parts); 00258 next($parts); 00259 while(list($k,$v)=each($parts)) { 00260 $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0; 00261 $tagLen = strcspn(substr($content,$pointer),'>')+1; 00262 00263 if (!$isEndTag) { // We meet a start-tag: 00264 if (!$nested) { // Ground level: 00265 $newParts[]=$buffer; // previous buffer stored 00266 $buffer=''; 00267 } 00268 $nested++; // We are inside now! 00269 $mbuffer=substr($content,$pointer,strlen($v)+$tagLen); // New buffer set and pointer increased 00270 $pointer+=strlen($mbuffer); 00271 $buffer.=$mbuffer; 00272 } else { // If we meet an endtag: 00273 $nested--; // decrease nested-level 00274 $eliminated=0; 00275 if ($eliminateExtraEndTags && $nested<0) { 00276 $nested=0; 00277 $eliminated=1; 00278 } else { 00279 $buffer.=substr($content,$pointer,$tagLen); // In any case, add the endtag to current buffer and increase pointer 00280 } 00281 $pointer+=$tagLen; 00282 if (!$nested && !$eliminated) { // if we're back on ground level, (and not by eliminating tags... 00283 $newParts[]=$buffer; 00284 $buffer=''; 00285 } 00286 $mbuffer=substr($content,$pointer,strlen($v)); // New buffer set and pointer increased 00287 $pointer+=strlen($mbuffer); 00288 $buffer.=$mbuffer; 00289 } 00290 00291 } 00292 $newParts[]=$buffer; 00293 return $newParts; 00294 } 00295 00308 function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0) { 00309 $parts = $this->splitIntoBlock($tag,$content,TRUE); 00310 foreach($parts as $k => $v) { 00311 if ($k%2) { 00312 $firstTagName = $this->getFirstTagName($v, TRUE); 00313 $tagsArray = array(); 00314 $tagsArray['tag_start'] = $this->getFirstTag($v); 00315 $tagsArray['tag_end'] = '</'.$firstTagName.'>'; 00316 $tagsArray['tag_name'] = strtolower($firstTagName); 00317 $tagsArray['add_level'] = 1; 00318 $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']); 00319 00320 if ($callBackTags) $tagsArray = $procObj->$callBackTags($tagsArray,$level); 00321 00322 $parts[$k] = 00323 $tagsArray['tag_start']. 00324 $tagsArray['content']. 00325 $tagsArray['tag_end']; 00326 } else { 00327 if ($callBackContent) $parts[$k] = $procObj->$callBackContent($parts[$k],$level); 00328 } 00329 } 00330 00331 return implode('',$parts); 00332 } 00333 00344 function splitTags($tag,$content) { 00345 $tags = t3lib_div::trimExplode(',',$tag,1); 00346 $regexStr = '/\<('.implode('|', $tags).')(\s[^>]*)?\/?>/si'; 00347 $parts = preg_split($regexStr, $content); 00348 00349 $pointer = strlen($parts[0]); 00350 $newParts = array(); 00351 $newParts[] = $parts[0]; 00352 reset($parts); 00353 next($parts); 00354 while(list($k,$v)=each($parts)) { 00355 $tagLen = strcspn(substr($content,$pointer),'>')+1; 00356 00357 // Set tag: 00358 $tag = substr($content,$pointer,$tagLen); // New buffer set and pointer increased 00359 $newParts[] = $tag; 00360 $pointer+= strlen($tag); 00361 00362 // Set content: 00363 $newParts[] = $v; 00364 $pointer+= strlen($v); 00365 } 00366 return $newParts; 00367 } 00368 00378 function getAllParts($parts,$tag_parts=1,$include_tag=1) { 00379 $newParts=array(); 00380 foreach ($parts as $k => $v) { 00381 if (($k+($tag_parts?0:1))%2) { 00382 if (!$include_tag) $v=$this->removeFirstAndLastTag($v); 00383 $newParts[]=$v; 00384 } 00385 } 00386 return $newParts; 00387 } 00388 00396 function removeFirstAndLastTag($str) { 00397 // End of first tag: 00398 $start = strpos($str,'>'); 00399 // Begin of last tag: 00400 $end = strrpos($str,'<'); 00401 // return 00402 return substr($str, $start+1, $end-$start-1); 00403 } 00404 00412 function getFirstTag($str) { 00413 // First: 00414 $endLen = strpos($str,'>')+1; 00415 return substr($str,0,$endLen); 00416 } 00417 00426 function getFirstTagName($str,$preserveCase=FALSE) { 00427 $matches = array(); 00428 if (preg_match('/^\s*\<([^\s\>]+)(\s|\>)/', $str, $matches)===1) { 00429 if (!$preserveCase) { 00430 return strtoupper($matches[1]); 00431 } 00432 return $matches[1]; 00433 } 00434 return ''; 00435 } 00436 00445 function get_tag_attributes($tag,$deHSC=0) { 00446 list($components,$metaC) = $this->split_tag_attributes($tag); 00447 $name = ''; // attribute name is stored here 00448 $valuemode = false; 00449 $attributes = array(); 00450 $attributesMeta = array(); 00451 if (is_array($components)) { 00452 foreach ($components as $key => $val) { 00453 if ($val != '=') { // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value 00454 if ($valuemode) { 00455 if ($name) { 00456 $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val; 00457 $attributesMeta[$name]['dashType']=$metaC[$key]; 00458 $name = ''; 00459 } 00460 } else { 00461 if ($namekey = preg_replace('/[^[:alnum:]_\:\-]/','',$val)) { 00462 $name = strtolower($namekey); 00463 $attributesMeta[$name]=array(); 00464 $attributesMeta[$name]['origTag']=$namekey; 00465 $attributes[$name] = ''; 00466 } 00467 } 00468 $valuemode = false; 00469 } else { 00470 $valuemode = true; 00471 } 00472 } 00473 return array($attributes,$attributesMeta); 00474 } 00475 } 00476 00486 function split_tag_attributes($tag) { 00487 $matches = array(); 00488 if (preg_match('/(\<[^\s]+\s+)?(.*?)\s*(\>)?$/s', $tag, $matches)!==1) { 00489 return array(array(), array()); 00490 } 00491 $tag_tmp = $matches[2]; 00492 00493 $metaValue = array(); 00494 $value = array(); 00495 $matches = array(); 00496 if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\s"\'\=]+|\=)/s', $tag_tmp, $matches)>0) { 00497 foreach ($matches[1] as $part) { 00498 $firstChar = substr($part, 0, 1); 00499 if ($firstChar=='"' || $firstChar=="'") { 00500 $metaValue[] = $firstChar; 00501 $value[] = substr($part, 1, -1); 00502 } else { 00503 $metaValue[] = ''; 00504 $value[] = $part; 00505 } 00506 } 00507 } 00508 return array($value,$metaValue); 00509 } 00510 00524 function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area') { 00525 $content = strtolower($content); 00526 $analyzedOutput=array(); 00527 $analyzedOutput['counts']=array(); // Counts appearances of start-tags 00528 $analyzedOutput['errors']=array(); // Lists ERRORS 00529 $analyzedOutput['warnings']=array(); // Lists warnings. 00530 $analyzedOutput['blocks']=array(); // Lists stats for block-tags 00531 $analyzedOutput['solo']=array(); // Lists stats for solo-tags 00532 00533 // Block tags, must have endings... 00534 $blockTags = explode(',',$blockTags); 00535 foreach($blockTags as $tagName) { 00536 $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1; 00537 $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1; 00538 $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd); 00539 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin; 00540 if ($countBegin-$countEnd) { 00541 if ($countBegin-$countEnd > 0) { 00542 $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!'; 00543 } else { 00544 $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.'; 00545 } 00546 } 00547 } 00548 00549 // Solo tags, must NOT have endings... 00550 $soloTags = explode(',',$soloTags); 00551 foreach($soloTags as $tagName) { 00552 $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1; 00553 $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1; 00554 $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd); 00555 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin; 00556 if ($countEnd) { 00557 $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).'; 00558 } 00559 } 00560 00561 return $analyzedOutput; 00562 } 00563 00564 00565 00566 00567 00568 00569 00570 00571 00572 00573 00574 00575 /********************************* 00576 * 00577 * Clean HTML code 00578 * 00579 *********************************/ 00580 00617 function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array()) { 00618 $newContent = array(); 00619 $tokArr = explode('<',$content); 00620 $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig); 00621 next($tokArr); 00622 00623 $c = 1; 00624 $tagRegister = array(); 00625 $tagStack = array(); 00626 while(list(,$tok)=each($tokArr)) { 00627 $firstChar = substr($tok,0,1); 00628 # if (strcmp(trim($firstChar),'')) { // It is a tag... 00629 if (preg_match('/[[:alnum:]\/]/',$firstChar)==1) { // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..> 00630 $tagEnd = strpos($tok,'>'); 00631 if ($tagEnd) { // If there is and end-bracket... tagEnd can't be 0 as the first character can't be a > 00632 $endTag = $firstChar=='/' ? 1 : 0; 00633 $tagContent = substr($tok,$endTag,$tagEnd-$endTag); 00634 $tagParts = preg_split('/\s+/s',$tagContent,2); 00635 $tagName = strtolower($tagParts[0]); 00636 if (isset($tags[$tagName])) { 00637 if (is_array($tags[$tagName])) { // If there is processing to do for the tag: 00638 00639 if (!$endTag) { // If NOT an endtag, do attribute processing (added dec. 2003) 00640 // Override attributes 00641 if (strcmp($tags[$tagName]['overrideAttribs'],'')) { 00642 $tagParts[1]=$tags[$tagName]['overrideAttribs']; 00643 } 00644 00645 // Allowed tags 00646 if (strcmp($tags[$tagName]['allowedAttribs'],'')) { 00647 if (!strcmp($tags[$tagName]['allowedAttribs'],'0')) { // No attribs allowed 00648 $tagParts[1]=''; 00649 } elseif (trim($tagParts[1])) { 00650 $tagAttrib = $this->get_tag_attributes($tagParts[1]); 00651 $tagParts[1]=''; 00652 $newTagAttrib = array(); 00653 if (!($tList = $tags[$tagName]['_allowedAttribs'])) { 00654 // Just explode attribts for tag once 00655 $tList = $tags[$tagName]['_allowedAttribs'] = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1); 00656 } 00657 foreach ($tList as $allowTag) { 00658 if (isset($tagAttrib[0][$allowTag])) $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag]; 00659 } 00660 $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]); 00661 } 00662 } 00663 00664 // Fixed attrib values 00665 if (is_array($tags[$tagName]['fixAttrib'])) { 00666 $tagAttrib = $this->get_tag_attributes($tagParts[1]); 00667 $tagParts[1]=''; 00668 reset($tags[$tagName]['fixAttrib']); 00669 while(list($attr,$params)=each($tags[$tagName]['fixAttrib'])) { 00670 if (strlen($params['set'])) $tagAttrib[0][$attr] = $params['set']; 00671 if (strlen($params['unset'])) unset($tagAttrib[0][$attr]); 00672 if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr])) $tagAttrib[0][$attr]=$params['default']; 00673 if ($params['always'] || isset($tagAttrib[0][$attr])) { 00674 if ($params['trim']) {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);} 00675 if ($params['intval']) {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);} 00676 if ($params['lower']) {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);} 00677 if ($params['upper']) {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);} 00678 if ($params['range']) { 00679 if (isset($params['range'][1])) { 00680 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1])); 00681 } else { 00682 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0])); 00683 } 00684 } 00685 if (is_array($params['list'])) { 00686 if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName))) $tagAttrib[0][$attr]=$params['list'][0]; 00687 } 00688 if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],''))) { 00689 unset($tagAttrib[0][$attr]); 00690 } 00691 if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp']))) { 00692 unset($tagAttrib[0][$attr]); 00693 } 00694 if ($params['prefixLocalAnchors']) { 00695 if (substr($tagAttrib[0][$attr],0,1)=='#') { 00696 $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL'); 00697 $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr]; 00698 if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL'))) { 00699 $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL'))); 00700 } 00701 } 00702 } 00703 if ($params['prefixRelPathWith']) { 00704 $urlParts = parse_url($tagAttrib[0][$attr]); 00705 if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/') { // If it is NOT an absolute URL (by http: or starting "/") 00706 $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr]; 00707 } 00708 } 00709 if ($params['userFunc']) { 00710 $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this); 00711 } 00712 } 00713 } 00714 $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]); 00715 } 00716 } else { // If endTag, remove any possible attributes: 00717 $tagParts[1]=''; 00718 } 00719 00720 // Protecting the tag by converting < and > to < and > ?? 00721 if ($tags[$tagName]['protect']) { 00722 $lt = '<'; $gt = '>'; 00723 } else { 00724 $lt = '<'; $gt = '>'; 00725 } 00726 // Remapping tag name? 00727 if ($tags[$tagName]['remap']) $tagParts[0] = $tags[$tagName]['remap']; 00728 00729 // rmTagIfNoAttrib 00730 if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib']) { 00731 $setTag=1; 00732 00733 if ($tags[$tagName]['nesting']) { 00734 if (!is_array($tagRegister[$tagName])) $tagRegister[$tagName]=array(); 00735 00736 if ($endTag) { 00737 /* if ($tags[$tagName]['nesting']=='global') { 00738 $lastEl = end($tagStack); 00739 $correctTag = !strcmp($tagName,$lastEl); 00740 } else $correctTag=1; 00741 */ 00742 $correctTag=1; 00743 if ($tags[$tagName]['nesting']=='global') { 00744 $lastEl = end($tagStack); 00745 if (strcmp($tagName,$lastEl)) { 00746 if (in_array($tagName,$tagStack)) { 00747 while(count($tagStack) && strcmp($tagName,$lastEl)) { 00748 $elPos = end($tagRegister[$lastEl]); 00749 unset($newContent[$elPos]); 00750 00751 array_pop($tagRegister[$lastEl]); 00752 array_pop($tagStack); 00753 $lastEl = end($tagStack); 00754 } 00755 } else { 00756 $correctTag=0; // In this case the 00757 } 00758 } 00759 } 00760 if (!count($tagRegister[$tagName]) || !$correctTag) { 00761 $setTag=0; 00762 } else { 00763 array_pop($tagRegister[$tagName]); 00764 if ($tags[$tagName]['nesting']=='global') {array_pop($tagStack);} 00765 } 00766 } else { 00767 array_push($tagRegister[$tagName],$c); 00768 if ($tags[$tagName]['nesting']=='global') {array_push($tagStack,$tagName);} 00769 } 00770 } 00771 00772 if ($setTag) { 00773 // Setting the tag 00774 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='<'); 00775 } 00776 } 00777 } else { 00778 $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag); 00779 } 00780 } elseif ($keepAll) { // This is if the tag was not defined in the array for processing: 00781 if (!strcmp($keepAll,'protect')) { 00782 $lt = '<'; $gt = '>'; 00783 } else { 00784 $lt = '<'; $gt = '>'; 00785 } 00786 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='<'); 00787 } 00788 $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig); 00789 } else { 00790 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig); // There were not end-bracket, so no tag... 00791 } 00792 } else { 00793 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig); // It was not a tag anyways 00794 } 00795 } 00796 00797 // Unsetting tags: 00798 foreach ($tagRegister as $tag => $positions) { 00799 foreach ($positions as $pKey) { 00800 unset($newContent[$pKey]); 00801 } 00802 } 00803 00804 return implode('',$newContent); 00805 } 00806 00814 function bidir_htmlspecialchars($value,$dir) { 00815 if ($dir==1) { 00816 $value = htmlspecialchars($value); 00817 } elseif ($dir==2) { 00818 $value = t3lib_div::deHSCentities(htmlspecialchars($value)); 00819 } elseif ($dir==-1) { 00820 $value = str_replace('>','>',$value); 00821 $value = str_replace('<','<',$value); 00822 $value = str_replace('"','"',$value); 00823 $value = str_replace('&','&',$value); 00824 } 00825 return $value; 00826 } 00827 00837 function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='') { 00838 00839 $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content); 00840 foreach ($parts as $k => $v) { 00841 if ($k%2) { 00842 $params = $this->get_tag_attributes($v,1); 00843 $tagEnd = substr($v,-2)=='/>' ? ' />' : '>'; // Detect tag-ending so that it is re-applied correctly. 00844 $firstTagName = $this->getFirstTagName($v); // The 'name' of the first tag 00845 $somethingDone=0; 00846 $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix; 00847 switch(strtolower($firstTagName)) { 00848 // background - attribute: 00849 case 'td': 00850 case 'body': 00851 case 'table': 00852 $src = $params[0]['background']; 00853 if ($src) { 00854 $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix); 00855 $somethingDone=1; 00856 } 00857 break; 00858 // src attribute 00859 case 'img': 00860 case 'input': 00861 case 'script': 00862 case 'embed': 00863 $src = $params[0]['src']; 00864 if ($src) { 00865 $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix); 00866 $somethingDone=1; 00867 } 00868 break; 00869 case 'link': 00870 case 'a': 00871 $src = $params[0]['href']; 00872 if ($src) { 00873 $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix); 00874 $somethingDone=1; 00875 } 00876 break; 00877 // action attribute 00878 case 'form': 00879 $src = $params[0]['action']; 00880 if ($src) { 00881 $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix); 00882 $somethingDone=1; 00883 } 00884 break; 00885 } 00886 if ($somethingDone) { 00887 $tagParts = preg_split('/\s+/s',$v,2); 00888 $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]); 00889 $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).$tagEnd; 00890 } 00891 } 00892 } 00893 $content = implode('',$parts); 00894 00895 // Fix <style> section: 00896 $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix; 00897 if (strlen($prefix)) { 00898 $parts = $this->splitIntoBlock('style',$content); 00899 foreach($parts as $k => $v) { 00900 if ($k%2) { 00901 $parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]); 00902 } 00903 } 00904 $content = implode('',$parts); 00905 } 00906 00907 return $content; 00908 } 00909 00919 function prefixRelPath($prefix,$srcVal,$suffix='') { 00920 $pU = parse_url($srcVal); 00921 if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/') { // If not an absolute URL. 00922 $srcVal = $prefix.$srcVal.$suffix; 00923 } 00924 return $srcVal; 00925 } 00926 00937 function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0) { 00938 $fontSplit = $this->splitIntoBlock('font',$value); // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set! 00939 foreach ($fontSplit as $k => $v) { 00940 if ($k%2) { // font: 00941 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v)); 00942 $newAttribs=array(); 00943 if ($keepFace && $attribArray['face']) $newAttribs[]='face="'.$attribArray['face'].'"'; 00944 if ($keepSize && $attribArray['size']) $newAttribs[]='size="'.$attribArray['size'].'"'; 00945 if ($keepColor && $attribArray['color']) $newAttribs[]='color="'.$attribArray['color'].'"'; 00946 00947 $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor); 00948 if (count($newAttribs)) { 00949 $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>'; 00950 } else { 00951 $fontSplit[$k]=$innerContent; 00952 } 00953 } 00954 } 00955 return implode('',$fontSplit); 00956 } 00957 00967 function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') { 00968 00969 foreach($tags as $from => $to) { 00970 $value = preg_replace('/'.preg_quote($ltChar).'(\/)?'.$from.'\s([^\>])*(\/)?\>/', $ltChar2.'$1'.$to.' $2$3>', $value); 00971 } 00972 return $value; 00973 } 00974 00982 function unprotectTags($content,$tagList='') { 00983 $tagsArray = t3lib_div::trimExplode(',',$tagList,1); 00984 $contentParts = explode('<',$content); 00985 next($contentParts); // bypass the first 00986 while(list($k,$tok)=each($contentParts)) { 00987 $firstChar = substr($tok,0,1); 00988 if (strcmp(trim($firstChar),'')) { 00989 $subparts = explode('>',$tok,2); 00990 $tagEnd = strlen($subparts[0]); 00991 if (strlen($tok)!=$tagEnd) { 00992 $endTag = $firstChar=='/' ? 1 : 0; 00993 $tagContent = substr($tok,$endTag,$tagEnd-$endTag); 00994 $tagParts = preg_split('/\s+/s',$tagContent,2); 00995 $tagName = strtolower($tagParts[0]); 00996 if (!strcmp($tagList,'') || in_array($tagName,$tagsArray)) { 00997 $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1]; 00998 } else $contentParts[$k] = '<'.$tok; 00999 } else $contentParts[$k] = '<'.$tok; 01000 } else $contentParts[$k] = '<'.$tok; 01001 } 01002 01003 return implode('',$contentParts); 01004 } 01005 01015 function stripTagsExcept($value,$tagList) { 01016 $tags=t3lib_div::trimExplode(',',$tagList,1); 01017 $forthArr=array(); 01018 $backArr=array(); 01019 foreach ($tags as $theTag) { 01020 $forthArr[$theTag]=md5($theTag); 01021 $backArr[md5($theTag)]=$theTag; 01022 } 01023 $value = $this->mapTags($value,$forthArr,'<','_'); 01024 $value=strip_tags($value); 01025 $value = $this->mapTags($value,$backArr,'_','<'); 01026 return $value; 01027 } 01028 01038 function caseShift($str,$flag,$cacheKey='') { 01039 $cacheKey .= $flag?1:0; 01040 if (is_array($str)) { 01041 if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey])) { 01042 reset($str); 01043 foreach ($str as $k => $v) { 01044 if (!$flag) { 01045 $str[$k] = strtoupper($v); 01046 } 01047 } 01048 if ($cacheKey) $this->caseShift_cache[$cacheKey]=$str; 01049 } else { 01050 $str = $this->caseShift_cache[$cacheKey]; 01051 } 01052 } elseif (!$flag) { $str = strtoupper($str); } 01053 return $str; 01054 } 01055 01065 function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0) { 01066 $accu=array(); 01067 foreach ($tagAttrib as $k =>$v) { 01068 if ($xhtmlClean) { 01069 $attr=strtolower($k); 01070 if (strcmp($v,'') || isset($meta[$k]['dashType'])) { 01071 $attr.='="'.htmlspecialchars($v).'"'; 01072 } 01073 } else { 01074 $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k; 01075 if (strcmp($v,'') || isset($meta[$k]['dashType'])) { 01076 $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"'); 01077 $attr.='='.$dash.$v.$dash; 01078 } 01079 } 01080 $accu[]=$attr; 01081 } 01082 return implode(' ',$accu); 01083 } 01084 01093 function get_tag_attributes_classic($tag,$deHSC=0) { 01094 $attr=$this->get_tag_attributes($tag,$deHSC); 01095 return is_array($attr[0])?$attr[0]:array(); 01096 } 01097 01106 function indentLines($content, $number=1, $indentChar="\t") { 01107 $preTab = str_pad('', $number*strlen($indentChar), $indentChar); 01108 $lines = explode(chr(10),str_replace(chr(13),'',$content)); 01109 foreach ($lines as $k => $v) { 01110 $lines[$k] = $preTab.$v; 01111 } 01112 return implode(chr(10), $lines); 01113 } 01114 01123 function HTMLparserConfig($TSconfig,$keepTags=array()) { 01124 // Allow tags (base list, merged with incoming array) 01125 $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1)); 01126 $keepTags = array_merge($alTags,$keepTags); 01127 01128 // Set config properties. 01129 if (is_array($TSconfig['tags.'])) { 01130 reset($TSconfig['tags.']); 01131 while(list($key,$tagC)=each($TSconfig['tags.'])) { 01132 if (!is_array($tagC) && $key==strtolower($key)) { 01133 if (!strcmp($tagC,'0')) unset($keepTags[$key]); 01134 if (!strcmp($tagC,'1') && !isset($keepTags[$key])) $keepTags[$key]=1; 01135 } 01136 } 01137 01138 reset($TSconfig['tags.']); 01139 foreach ($TSconfig['tags.'] as $key => $tagC) { 01140 if (is_array($tagC) && $key==strtolower($key)) { 01141 $key=substr($key,0,-1); 01142 if (!is_array($keepTags[$key])) $keepTags[$key]=array(); 01143 if (is_array($tagC['fixAttrib.'])) { 01144 reset($tagC['fixAttrib.']); 01145 while(list($atName,$atConfig)=each($tagC['fixAttrib.'])) { 01146 if (is_array($atConfig)) { 01147 $atName=substr($atName,0,-1); 01148 if (!is_array($keepTags[$key]['fixAttrib'][$atName])) { 01149 $keepTags[$key]['fixAttrib'][$atName]=array(); 01150 } 01151 $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble... 01152 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],'')) $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']); 01153 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],'')) $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']); 01154 } 01155 } 01156 } 01157 unset($tagC['fixAttrib.']); 01158 unset($tagC['fixAttrib']); 01159 $keepTags[$key] = array_merge($keepTags[$key],$tagC); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble... 01160 } 01161 } 01162 } 01163 // localNesting 01164 if ($TSconfig['localNesting']) { 01165 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1); 01166 while(list(,$tn)=each($lN)) { 01167 if (isset($keepTags[$tn])) { 01168 $keepTags[$tn]['nesting']=1; 01169 } 01170 } 01171 } 01172 if ($TSconfig['globalNesting']) { 01173 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1); 01174 while(list(,$tn)=each($lN)) { 01175 if (isset($keepTags[$tn])) { 01176 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array(); 01177 $keepTags[$tn]['nesting']='global'; 01178 } 01179 } 01180 } 01181 if ($TSconfig['rmTagIfNoAttrib']) { 01182 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1); 01183 while(list(,$tn)=each($lN)) { 01184 if (isset($keepTags[$tn])) { 01185 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array(); 01186 $keepTags[$tn]['rmTagIfNoAttrib']=1; 01187 } 01188 } 01189 } 01190 if ($TSconfig['noAttrib']) { 01191 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1); 01192 while(list(,$tn)=each($lN)) { 01193 if (isset($keepTags[$tn])) { 01194 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array(); 01195 $keepTags[$tn]['allowedAttribs']=0; 01196 } 01197 } 01198 } 01199 if ($TSconfig['removeTags']) { 01200 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1); 01201 while(list(,$tn)=each($lN)) { 01202 $keepTags[$tn]=array(); 01203 $keepTags[$tn]['allowedAttribs']=0; 01204 $keepTags[$tn]['rmTagIfNoAttrib']=1; 01205 } 01206 } 01207 01208 // Create additional configuration: 01209 $addConfig=array(); 01210 if ($TSconfig['xhtml_cleaning']) { 01211 $addConfig['xhtml']=1; 01212 } 01213 01214 return array( 01215 $keepTags, 01216 ''.$TSconfig['keepNonMatchedTags'], 01217 intval($TSconfig['htmlSpecialChars']), 01218 $addConfig 01219 ); 01220 } 01221 01247 function XHTML_clean($content) { 01248 $content = $this->HTMLcleaner( 01249 $content, 01250 array(), // No tags treated specially 01251 1, // Keep ALL tags. 01252 0, // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break... 01253 array('xhtml' => 1) 01254 ); 01255 return $content; 01256 } 01257 01269 function processTag($value,$conf,$endTag,$protected=0) { 01270 // Return immediately if protected or no parameters 01271 if ($protected || !count($conf)) return $value; 01272 // OK then, begin processing for XHTML output: 01273 // STILL VERY EXPERIMENTAL!! 01274 if ($conf['xhtml']) { 01275 if ($endTag) { // Endtags are just set lowercase right away 01276 $value = strtolower($value); 01277 } elseif (substr($value,0,4)!='<!--') { // ... and comments are ignored. 01278 $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1)); // Finding inner value with out < > 01279 list($tagName,$tagP)=preg_split('/\s+/s',$inValue,2); // Separate attributes and tagname 01280 $tagName = strtolower($tagName); 01281 01282 // Process attributes 01283 $tagAttrib = $this->get_tag_attributes($tagP); 01284 if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt'])) $tagAttrib[0]['alt']=''; // Set alt attribute for all images (not XHTML though...) 01285 if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type'])) $tagAttrib[0]['type']='text/javascript'; // Set type attribute for all script-tags 01286 $outA=array(); 01287 reset($tagAttrib[0]); 01288 while(list($attrib_name,$attrib_value)=each($tagAttrib[0])) { 01289 // Set attributes: lowercase, always in quotes, with htmlspecialchars converted. 01290 $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"'; 01291 } 01292 $newTag='<'.trim($tagName.' '.implode(' ',$outA)); 01293 // All tags that are standalone (not wrapping, not having endtags) should be ended with '/>' 01294 if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>') { 01295 $newTag.=' />'; 01296 } else { 01297 $newTag.='>'; 01298 } 01299 $value = $newTag; 01300 } 01301 } 01302 01303 return $value; 01304 } 01305 01315 function processContent($value,$dir,$conf) { 01316 if ($dir!=0) $value = $this->bidir_htmlspecialchars($value,$dir); 01317 return $value; 01318 } 01319 } 01320 01321 01322 01323 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']) { 01324 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']); 01325 } 01326 ?>