00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00106 class t3lib_parsehtml {
00107 var $caseShift_cache=array();
00108
00109
00110
00111
00112
00113
00114
00115
00123 function getSubpart($content, $marker) {
00124 $start = strpos($content, $marker);
00125 if ($start===false) { return ''; }
00126 $start += strlen($marker);
00127 $stop = strpos($content, $marker, $start);
00128
00129 if ($stop===false) { return ''; }
00130 $content = substr($content, $start, $stop-$start);
00131 $matches = array();
00132 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1) {
00133 return $matches[2];
00134 }
00135 $matches = array();
00136 if (preg_match('/(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1) {
00137 return $matches[1];
00138 }
00139 $matches = array();
00140 if (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $content, $matches)===1) {
00141 return $matches[2];
00142 }
00143 return $content;
00144 }
00145
00156 function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0) {
00157 $start = strpos($content, $marker);
00158 if ($start===false) { return $content; }
00159 $startAM = $start+strlen($marker);
00160 $stop = strpos($content, $marker, $startAM);
00161 if ($stop===false) { return $content; }
00162 $stopAM = $stop+strlen($marker);
00163 $before = substr($content, 0, $start);
00164 $after = substr($content, $stopAM);
00165 $between = substr($content, $startAM, $stop-$startAM);
00166
00167 if ($recursive) {
00168 $after = t3lib_parsehtml::substituteSubpart($after, $marker, $subpartContent, $recursive, $keepMarker);
00169 }
00170
00171 if ($keepMarker) {
00172 $matches = array();
00173 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) {
00174 $before .= $marker.$matches[1];
00175 $between = $matches[2];
00176 $after = $matches[3].$marker.$after;
00177 } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) {
00178 $before .= $marker;
00179 $between = $matches[1];
00180 $after = $matches[2].$marker.$after;
00181 } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1) {
00182 $before .= $marker.$matches[1];
00183 $between = $matches[2];
00184 $after = $marker.$after;
00185 } else {
00186 $before .= $marker;
00187 $after = $marker.$after;
00188 }
00189 } else {
00190 $matches = array();
00191 if (preg_match('/^(.*)\<\!\-\-[^\>]*$/s', $before, $matches)===1) {
00192 $before = $matches[1];
00193 }
00194 if (is_array($subpartContent)) {
00195 $matches = array();
00196 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) {
00197 $between = $matches[2];
00198 } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1) {
00199 $between = $matches[1];
00200 } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1) {
00201 $between = $matches[2];
00202 }
00203 }
00204 $matches = array();
00205 if (preg_match('/^[^\<]*\-\-\>(.*)$/s', $after, $matches)===1) {
00206 $after = $matches[1];
00207 }
00208 }
00209
00210 if (is_array($subpartContent)) {
00211 $between = $subpartContent[0].$between.$subpartContent[1];
00212 } else {
00213 $between = $subpartContent;
00214 }
00215
00216 return $before.$between.$after;
00217 }
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00247 function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) {
00248 $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
00249 $regexStr = '/\<\/?('.implode('|', $tags).')(\s*\>|\s[^\>]*\>)/si';
00250
00251 $parts = preg_split($regexStr, $content);
00252
00253 $newParts=array();
00254 $pointer=strlen($parts[0]);
00255 $buffer=$parts[0];
00256 $nested=0;
00257 reset($parts);
00258 next($parts);
00259 while(list($k,$v)=each($parts)) {
00260 $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
00261 $tagLen = strcspn(substr($content,$pointer),'>')+1;
00262
00263 if (!$isEndTag) {
00264 if (!$nested) {
00265 $newParts[]=$buffer;
00266 $buffer='';
00267 }
00268 $nested++;
00269 $mbuffer=substr($content,$pointer,strlen($v)+$tagLen);
00270 $pointer+=strlen($mbuffer);
00271 $buffer.=$mbuffer;
00272 } else {
00273 $nested--;
00274 $eliminated=0;
00275 if ($eliminateExtraEndTags && $nested<0) {
00276 $nested=0;
00277 $eliminated=1;
00278 } else {
00279 $buffer.=substr($content,$pointer,$tagLen);
00280 }
00281 $pointer+=$tagLen;
00282 if (!$nested && !$eliminated) {
00283 $newParts[]=$buffer;
00284 $buffer='';
00285 }
00286 $mbuffer=substr($content,$pointer,strlen($v));
00287 $pointer+=strlen($mbuffer);
00288 $buffer.=$mbuffer;
00289 }
00290
00291 }
00292 $newParts[]=$buffer;
00293 return $newParts;
00294 }
00295
00308 function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0) {
00309 $parts = $this->splitIntoBlock($tag,$content,TRUE);
00310 foreach($parts as $k => $v) {
00311 if ($k%2) {
00312 $firstTagName = $this->getFirstTagName($v, TRUE);
00313 $tagsArray = array();
00314 $tagsArray['tag_start'] = $this->getFirstTag($v);
00315 $tagsArray['tag_end'] = '</'.$firstTagName.'>';
00316 $tagsArray['tag_name'] = strtolower($firstTagName);
00317 $tagsArray['add_level'] = 1;
00318 $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
00319
00320 if ($callBackTags) $tagsArray = $procObj->$callBackTags($tagsArray,$level);
00321
00322 $parts[$k] =
00323 $tagsArray['tag_start'].
00324 $tagsArray['content'].
00325 $tagsArray['tag_end'];
00326 } else {
00327 if ($callBackContent) $parts[$k] = $procObj->$callBackContent($parts[$k],$level);
00328 }
00329 }
00330
00331 return implode('',$parts);
00332 }
00333
00344 function splitTags($tag,$content) {
00345 $tags = t3lib_div::trimExplode(',',$tag,1);
00346 $regexStr = '/\<('.implode('|', $tags).')(\s[^>]*)?\/?>/si';
00347 $parts = preg_split($regexStr, $content);
00348
00349 $pointer = strlen($parts[0]);
00350 $newParts = array();
00351 $newParts[] = $parts[0];
00352 reset($parts);
00353 next($parts);
00354 while(list($k,$v)=each($parts)) {
00355 $tagLen = strcspn(substr($content,$pointer),'>')+1;
00356
00357
00358 $tag = substr($content,$pointer,$tagLen);
00359 $newParts[] = $tag;
00360 $pointer+= strlen($tag);
00361
00362
00363 $newParts[] = $v;
00364 $pointer+= strlen($v);
00365 }
00366 return $newParts;
00367 }
00368
00378 function getAllParts($parts,$tag_parts=1,$include_tag=1) {
00379 $newParts=array();
00380 foreach ($parts as $k => $v) {
00381 if (($k+($tag_parts?0:1))%2) {
00382 if (!$include_tag) $v=$this->removeFirstAndLastTag($v);
00383 $newParts[]=$v;
00384 }
00385 }
00386 return $newParts;
00387 }
00388
00396 function removeFirstAndLastTag($str) {
00397
00398 $start = strpos($str,'>');
00399
00400 $end = strrpos($str,'<');
00401
00402 return substr($str, $start+1, $end-$start-1);
00403 }
00404
00412 function getFirstTag($str) {
00413
00414 $endLen = strpos($str,'>')+1;
00415 return substr($str,0,$endLen);
00416 }
00417
00426 function getFirstTagName($str,$preserveCase=FALSE) {
00427 $matches = array();
00428 if (preg_match('/^\s*\<([^\s\>]+)(\s|\>)/', $str, $matches)===1) {
00429 if (!$preserveCase) {
00430 return strtoupper($matches[1]);
00431 }
00432 return $matches[1];
00433 }
00434 return '';
00435 }
00436
00445 function get_tag_attributes($tag,$deHSC=0) {
00446 list($components,$metaC) = $this->split_tag_attributes($tag);
00447 $name = '';
00448 $valuemode = false;
00449 $attributes = array();
00450 $attributesMeta = array();
00451 if (is_array($components)) {
00452 foreach ($components as $key => $val) {
00453 if ($val != '=') {
00454 if ($valuemode) {
00455 if ($name) {
00456 $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
00457 $attributesMeta[$name]['dashType']=$metaC[$key];
00458 $name = '';
00459 }
00460 } else {
00461 if ($namekey = preg_replace('/[^[:alnum:]_\:\-]/','',$val)) {
00462 $name = strtolower($namekey);
00463 $attributesMeta[$name]=array();
00464 $attributesMeta[$name]['origTag']=$namekey;
00465 $attributes[$name] = '';
00466 }
00467 }
00468 $valuemode = false;
00469 } else {
00470 $valuemode = true;
00471 }
00472 }
00473 return array($attributes,$attributesMeta);
00474 }
00475 }
00476
00486 function split_tag_attributes($tag) {
00487 $matches = array();
00488 if (preg_match('/(\<[^\s]+\s+)?(.*?)\s*(\>)?$/s', $tag, $matches)!==1) {
00489 return array(array(), array());
00490 }
00491 $tag_tmp = $matches[2];
00492
00493 $metaValue = array();
00494 $value = array();
00495 $matches = array();
00496 if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\s"\'\=]+|\=)/s', $tag_tmp, $matches)>0) {
00497 foreach ($matches[1] as $part) {
00498 $firstChar = substr($part, 0, 1);
00499 if ($firstChar=='"' || $firstChar=="'") {
00500 $metaValue[] = $firstChar;
00501 $value[] = substr($part, 1, -1);
00502 } else {
00503 $metaValue[] = '';
00504 $value[] = $part;
00505 }
00506 }
00507 }
00508 return array($value,$metaValue);
00509 }
00510
00524 function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area') {
00525 $content = strtolower($content);
00526 $analyzedOutput=array();
00527 $analyzedOutput['counts']=array();
00528 $analyzedOutput['errors']=array();
00529 $analyzedOutput['warnings']=array();
00530 $analyzedOutput['blocks']=array();
00531 $analyzedOutput['solo']=array();
00532
00533
00534 $blockTags = explode(',',$blockTags);
00535 foreach($blockTags as $tagName) {
00536 $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
00537 $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
00538 $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
00539 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin;
00540 if ($countBegin-$countEnd) {
00541 if ($countBegin-$countEnd > 0) {
00542 $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
00543 } else {
00544 $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
00545 }
00546 }
00547 }
00548
00549
00550 $soloTags = explode(',',$soloTags);
00551 foreach($soloTags as $tagName) {
00552 $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
00553 $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
00554 $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
00555 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin;
00556 if ($countEnd) {
00557 $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
00558 }
00559 }
00560
00561 return $analyzedOutput;
00562 }
00563
00564
00565
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578
00579
00580
00617 function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array()) {
00618 $newContent = array();
00619 $tokArr = explode('<',$content);
00620 $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
00621 next($tokArr);
00622
00623 $c = 1;
00624 $tagRegister = array();
00625 $tagStack = array();
00626 while(list(,$tok)=each($tokArr)) {
00627 $firstChar = substr($tok,0,1);
00628 # if (strcmp(trim($firstChar),'')) { // It is a tag...
00629 if (preg_match('/[[:alnum:]\/]/',$firstChar)==1) {
00630 $tagEnd = strpos($tok,'>');
00631 if ($tagEnd) {
00632 $endTag = $firstChar=='/' ? 1 : 0;
00633 $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00634 $tagParts = preg_split('/\s+/s',$tagContent,2);
00635 $tagName = strtolower($tagParts[0]);
00636 if (isset($tags[$tagName])) {
00637 if (is_array($tags[$tagName])) {
00638
00639 if (!$endTag) {
00640
00641 if (strcmp($tags[$tagName]['overrideAttribs'],'')) {
00642 $tagParts[1]=$tags[$tagName]['overrideAttribs'];
00643 }
00644
00645
00646 if (strcmp($tags[$tagName]['allowedAttribs'],'')) {
00647 if (!strcmp($tags[$tagName]['allowedAttribs'],'0')) {
00648 $tagParts[1]='';
00649 } elseif (trim($tagParts[1])) {
00650 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00651 $tagParts[1]='';
00652 $newTagAttrib = array();
00653 if (!($tList = $tags[$tagName]['_allowedAttribs'])) {
00654
00655 $tList = $tags[$tagName]['_allowedAttribs'] = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
00656 }
00657 foreach ($tList as $allowTag) {
00658 if (isset($tagAttrib[0][$allowTag])) $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
00659 }
00660 $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
00661 }
00662 }
00663
00664
00665 if (is_array($tags[$tagName]['fixAttrib'])) {
00666 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00667 $tagParts[1]='';
00668 reset($tags[$tagName]['fixAttrib']);
00669 while(list($attr,$params)=each($tags[$tagName]['fixAttrib'])) {
00670 if (strlen($params['set'])) $tagAttrib[0][$attr] = $params['set'];
00671 if (strlen($params['unset'])) unset($tagAttrib[0][$attr]);
00672 if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr])) $tagAttrib[0][$attr]=$params['default'];
00673 if ($params['always'] || isset($tagAttrib[0][$attr])) {
00674 if ($params['trim']) {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
00675 if ($params['intval']) {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
00676 if ($params['lower']) {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
00677 if ($params['upper']) {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
00678 if ($params['range']) {
00679 if (isset($params['range'][1])) {
00680 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
00681 } else {
00682 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
00683 }
00684 }
00685 if (is_array($params['list'])) {
00686 if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName))) $tagAttrib[0][$attr]=$params['list'][0];
00687 }
00688 if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],''))) {
00689 unset($tagAttrib[0][$attr]);
00690 }
00691 if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp']))) {
00692 unset($tagAttrib[0][$attr]);
00693 }
00694 if ($params['prefixLocalAnchors']) {
00695 if (substr($tagAttrib[0][$attr],0,1)=='#') {
00696 $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
00697 $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
00698 if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL'))) {
00699 $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
00700 }
00701 }
00702 }
00703 if ($params['prefixRelPathWith']) {
00704 $urlParts = parse_url($tagAttrib[0][$attr]);
00705 if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/') {
00706 $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
00707 }
00708 }
00709 if ($params['userFunc']) {
00710 $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
00711 }
00712 }
00713 }
00714 $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
00715 }
00716 } else {
00717 $tagParts[1]='';
00718 }
00719
00720
00721 if ($tags[$tagName]['protect']) {
00722 $lt = '<'; $gt = '>';
00723 } else {
00724 $lt = '<'; $gt = '>';
00725 }
00726
00727 if ($tags[$tagName]['remap']) $tagParts[0] = $tags[$tagName]['remap'];
00728
00729
00730 if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib']) {
00731 $setTag=1;
00732
00733 if ($tags[$tagName]['nesting']) {
00734 if (!is_array($tagRegister[$tagName])) $tagRegister[$tagName]=array();
00735
00736 if ($endTag) {
00737
00738
00739
00740
00741
00742 $correctTag=1;
00743 if ($tags[$tagName]['nesting']=='global') {
00744 $lastEl = end($tagStack);
00745 if (strcmp($tagName,$lastEl)) {
00746 if (in_array($tagName,$tagStack)) {
00747 while(count($tagStack) && strcmp($tagName,$lastEl)) {
00748 $elPos = end($tagRegister[$lastEl]);
00749 unset($newContent[$elPos]);
00750
00751 array_pop($tagRegister[$lastEl]);
00752 array_pop($tagStack);
00753 $lastEl = end($tagStack);
00754 }
00755 } else {
00756 $correctTag=0;
00757 }
00758 }
00759 }
00760 if (!count($tagRegister[$tagName]) || !$correctTag) {
00761 $setTag=0;
00762 } else {
00763 array_pop($tagRegister[$tagName]);
00764 if ($tags[$tagName]['nesting']=='global') {array_pop($tagStack);}
00765 }
00766 } else {
00767 array_push($tagRegister[$tagName],$c);
00768 if ($tags[$tagName]['nesting']=='global') {array_push($tagStack,$tagName);}
00769 }
00770 }
00771
00772 if ($setTag) {
00773
00774 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='<');
00775 }
00776 }
00777 } else {
00778 $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
00779 }
00780 } elseif ($keepAll) {
00781 if (!strcmp($keepAll,'protect')) {
00782 $lt = '<'; $gt = '>';
00783 } else {
00784 $lt = '<'; $gt = '>';
00785 }
00786 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='<');
00787 }
00788 $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
00789 } else {
00790 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);
00791 }
00792 } else {
00793 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);
00794 }
00795 }
00796
00797
00798 foreach ($tagRegister as $tag => $positions) {
00799 foreach ($positions as $pKey) {
00800 unset($newContent[$pKey]);
00801 }
00802 }
00803
00804 return implode('',$newContent);
00805 }
00806
00814 function bidir_htmlspecialchars($value,$dir) {
00815 if ($dir==1) {
00816 $value = htmlspecialchars($value);
00817 } elseif ($dir==2) {
00818 $value = t3lib_div::deHSCentities(htmlspecialchars($value));
00819 } elseif ($dir==-1) {
00820 $value = str_replace('>','>',$value);
00821 $value = str_replace('<','<',$value);
00822 $value = str_replace('"','"',$value);
00823 $value = str_replace('&','&',$value);
00824 }
00825 return $value;
00826 }
00827
00837 function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='') {
00838
00839 $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
00840 foreach ($parts as $k => $v) {
00841 if ($k%2) {
00842 $params = $this->get_tag_attributes($v,1);
00843 $tagEnd = substr($v,-2)=='/>' ? ' />' : '>'; // Detect tag-ending so that it is re-applied correctly.
00844 $firstTagName = $this->getFirstTagName($v); // The 'name' of the first tag
00845 $somethingDone=0;
00846 $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
00847 switch(strtolower($firstTagName)) {
00848 // background - attribute:
00849 case 'td':
00850 case 'body':
00851 case 'table':
00852 $src = $params[0]['background'];
00853 if ($src) {
00854 $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
00855 $somethingDone=1;
00856 }
00857 break;
00858 // src attribute
00859 case 'img':
00860 case 'input':
00861 case 'script':
00862 case 'embed':
00863 $src = $params[0]['src'];
00864 if ($src) {
00865 $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
00866 $somethingDone=1;
00867 }
00868 break;
00869 case 'link':
00870 case 'a':
00871 $src = $params[0]['href'];
00872 if ($src) {
00873 $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
00874 $somethingDone=1;
00875 }
00876 break;
00877 // action attribute
00878 case 'form':
00879 $src = $params[0]['action'];
00880 if ($src) {
00881 $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
00882 $somethingDone=1;
00883 }
00884 break;
00885 }
00886 if ($somethingDone) {
00887 $tagParts = preg_split('/\s+/s',$v,2);
00888 $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
00889 $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).$tagEnd;
00890 }
00891 }
00892 }
00893 $content = implode('',$parts);
00894
00895 // Fix <style> section:
00896 $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
00897 if (strlen($prefix)) {
00898 $parts = $this->splitIntoBlock('style',$content);
00899 foreach($parts as $k => $v) {
00900 if ($k%2) {
00901 $parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
00902 }
00903 }
00904 $content = implode('',$parts);
00905 }
00906
00907 return $content;
00908 }
00909
00919 function prefixRelPath($prefix,$srcVal,$suffix='') {
00920 $pU = parse_url($srcVal);
00921 if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/') {
00922 $srcVal = $prefix.$srcVal.$suffix;
00923 }
00924 return $srcVal;
00925 }
00926
00937 function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0) {
00938 $fontSplit = $this->splitIntoBlock('font',$value);
00939 foreach ($fontSplit as $k => $v) {
00940 if ($k%2) {
00941 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
00942 $newAttribs=array();
00943 if ($keepFace && $attribArray['face']) $newAttribs[]='face="'.$attribArray['face'].'"';
00944 if ($keepSize && $attribArray['size']) $newAttribs[]='size="'.$attribArray['size'].'"';
00945 if ($keepColor && $attribArray['color']) $newAttribs[]='color="'.$attribArray['color'].'"';
00946
00947 $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
00948 if (count($newAttribs)) {
00949 $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
00950 } else {
00951 $fontSplit[$k]=$innerContent;
00952 }
00953 }
00954 }
00955 return implode('',$fontSplit);
00956 }
00957
00967 function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') {
00968
00969 foreach($tags as $from => $to) {
00970 $value = preg_replace('/'.preg_quote($ltChar).'(\/)?'.$from.'\s([^\>])*(\/)?\>/', $ltChar2.'$1'.$to.' $2$3>', $value);
00971 }
00972 return $value;
00973 }
00974
00982 function unprotectTags($content,$tagList='') {
00983 $tagsArray = t3lib_div::trimExplode(',',$tagList,1);
00984 $contentParts = explode('<',$content);
00985 next($contentParts);
00986 while(list($k,$tok)=each($contentParts)) {
00987 $firstChar = substr($tok,0,1);
00988 if (strcmp(trim($firstChar),'')) {
00989 $subparts = explode('>',$tok,2);
00990 $tagEnd = strlen($subparts[0]);
00991 if (strlen($tok)!=$tagEnd) {
00992 $endTag = $firstChar=='/' ? 1 : 0;
00993 $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00994 $tagParts = preg_split('/\s+/s',$tagContent,2);
00995 $tagName = strtolower($tagParts[0]);
00996 if (!strcmp($tagList,'') || in_array($tagName,$tagsArray)) {
00997 $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
00998 } else $contentParts[$k] = '<'.$tok;
00999 } else $contentParts[$k] = '<'.$tok;
01000 } else $contentParts[$k] = '<'.$tok;
01001 }
01002
01003 return implode('',$contentParts);
01004 }
01005
01015 function stripTagsExcept($value,$tagList) {
01016 $tags=t3lib_div::trimExplode(',',$tagList,1);
01017 $forthArr=array();
01018 $backArr=array();
01019 foreach ($tags as $theTag) {
01020 $forthArr[$theTag]=md5($theTag);
01021 $backArr[md5($theTag)]=$theTag;
01022 }
01023 $value = $this->mapTags($value,$forthArr,'<','_');
01024 $value=strip_tags($value);
01025 $value = $this->mapTags($value,$backArr,'_','<');
01026 return $value;
01027 }
01028
01038 function caseShift($str,$flag,$cacheKey='') {
01039 $cacheKey .= $flag?1:0;
01040 if (is_array($str)) {
01041 if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey])) {
01042 reset($str);
01043 foreach ($str as $k => $v) {
01044 if (!$flag) {
01045 $str[$k] = strtoupper($v);
01046 }
01047 }
01048 if ($cacheKey) $this->caseShift_cache[$cacheKey]=$str;
01049 } else {
01050 $str = $this->caseShift_cache[$cacheKey];
01051 }
01052 } elseif (!$flag) { $str = strtoupper($str); }
01053 return $str;
01054 }
01055
01065 function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0) {
01066 $accu=array();
01067 foreach ($tagAttrib as $k =>$v) {
01068 if ($xhtmlClean) {
01069 $attr=strtolower($k);
01070 if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
01071 $attr.='="'.htmlspecialchars($v).'"';
01072 }
01073 } else {
01074 $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
01075 if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
01076 $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
01077 $attr.='='.$dash.$v.$dash;
01078 }
01079 }
01080 $accu[]=$attr;
01081 }
01082 return implode(' ',$accu);
01083 }
01084
01093 function get_tag_attributes_classic($tag,$deHSC=0) {
01094 $attr=$this->get_tag_attributes($tag,$deHSC);
01095 return is_array($attr[0])?$attr[0]:array();
01096 }
01097
01106 function indentLines($content, $number=1, $indentChar="\t") {
01107 $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
01108 $lines = explode(chr(10),str_replace(chr(13),'',$content));
01109 foreach ($lines as $k => $v) {
01110 $lines[$k] = $preTab.$v;
01111 }
01112 return implode(chr(10), $lines);
01113 }
01114
01123 function HTMLparserConfig($TSconfig,$keepTags=array()) {
01124 // Allow tags (base list, merged with incoming array)
01125 $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
01126 $keepTags = array_merge($alTags,$keepTags);
01127
01128 // Set config properties.
01129 if (is_array($TSconfig['tags.'])) {
01130 reset($TSconfig['tags.']);
01131 while(list($key,$tagC)=each($TSconfig['tags.'])) {
01132 if (!is_array($tagC) && $key==strtolower($key)) {
01133 if (!strcmp($tagC,'0')) unset($keepTags[$key]);
01134 if (!strcmp($tagC,'1') && !isset($keepTags[$key])) $keepTags[$key]=1;
01135 }
01136 }
01137
01138 reset($TSconfig['tags.']);
01139 foreach ($TSconfig['tags.'] as $key => $tagC) {
01140 if (is_array($tagC) && $key==strtolower($key)) {
01141 $key=substr($key,0,-1);
01142 if (!is_array($keepTags[$key])) $keepTags[$key]=array();
01143 if (is_array($tagC['fixAttrib.'])) {
01144 reset($tagC['fixAttrib.']);
01145 while(list($atName,$atConfig)=each($tagC['fixAttrib.'])) {
01146 if (is_array($atConfig)) {
01147 $atName=substr($atName,0,-1);
01148 if (!is_array($keepTags[$key]['fixAttrib'][$atName])) {
01149 $keepTags[$key]['fixAttrib'][$atName]=array();
01150 }
01151 $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01152 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],'')) $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
01153 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],'')) $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
01154 }
01155 }
01156 }
01157 unset($tagC['fixAttrib.']);
01158 unset($tagC['fixAttrib']);
01159 $keepTags[$key] = array_merge($keepTags[$key],$tagC); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01160 }
01161 }
01162 }
01163 // localNesting
01164 if ($TSconfig['localNesting']) {
01165 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
01166 while(list(,$tn)=each($lN)) {
01167 if (isset($keepTags[$tn])) {
01168 $keepTags[$tn]['nesting']=1;
01169 }
01170 }
01171 }
01172 if ($TSconfig['globalNesting']) {
01173 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
01174 while(list(,$tn)=each($lN)) {
01175 if (isset($keepTags[$tn])) {
01176 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
01177 $keepTags[$tn]['nesting']='global';
01178 }
01179 }
01180 }
01181 if ($TSconfig['rmTagIfNoAttrib']) {
01182 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
01183 while(list(,$tn)=each($lN)) {
01184 if (isset($keepTags[$tn])) {
01185 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
01186 $keepTags[$tn]['rmTagIfNoAttrib']=1;
01187 }
01188 }
01189 }
01190 if ($TSconfig['noAttrib']) {
01191 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
01192 while(list(,$tn)=each($lN)) {
01193 if (isset($keepTags[$tn])) {
01194 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
01195 $keepTags[$tn]['allowedAttribs']=0;
01196 }
01197 }
01198 }
01199 if ($TSconfig['removeTags']) {
01200 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
01201 while(list(,$tn)=each($lN)) {
01202 $keepTags[$tn]=array();
01203 $keepTags[$tn]['allowedAttribs']=0;
01204 $keepTags[$tn]['rmTagIfNoAttrib']=1;
01205 }
01206 }
01207
01208 // Create additional configuration:
01209 $addConfig=array();
01210 if ($TSconfig['xhtml_cleaning']) {
01211 $addConfig['xhtml']=1;
01212 }
01213
01214 return array(
01215 $keepTags,
01216 ''.$TSconfig['keepNonMatchedTags'],
01217 intval($TSconfig['htmlSpecialChars']),
01218 $addConfig
01219 );
01220 }
01221
01247 function XHTML_clean($content) {
01248 $content = $this->HTMLcleaner(
01249 $content,
01250 array(), // No tags treated specially
01251 1, // Keep ALL tags.
01252 0, // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
01253 array('xhtml' => 1)
01254 );
01255 return $content;
01256 }
01257
01269 function processTag($value,$conf,$endTag,$protected=0) {
01270 // Return immediately if protected or no parameters
01271 if ($protected || !count($conf)) return $value;
01272 // OK then, begin processing for XHTML output:
01273 // STILL VERY EXPERIMENTAL!!
01274 if ($conf['xhtml']) {
01275 if ($endTag) { // Endtags are just set lowercase right away
01276 $value = strtolower($value);
01277 } elseif (substr($value,0,4)!='<!--') { // ... and comments are ignored.
01278 $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1)); // Finding inner value with out < >
01279 list($tagName,$tagP)=preg_split('/\s+/s',$inValue,2); // Separate attributes and tagname
01280 $tagName = strtolower($tagName);
01281
01282 // Process attributes
01283 $tagAttrib = $this->get_tag_attributes($tagP);
01284 if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt'])) $tagAttrib[0]['alt']=''; // Set alt attribute for all images (not XHTML though...)
01285 if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type'])) $tagAttrib[0]['type']='text/javascript'; // Set type attribute for all script-tags
01286 $outA=array();
01287 reset($tagAttrib[0]);
01288 while(list($attrib_name,$attrib_value)=each($tagAttrib[0])) {
01289 // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
01290 $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
01291 }
01292 $newTag='<'.trim($tagName.' '.implode(' ',$outA));
01293
01294 if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>') {
01295 $newTag.=' />';
01296 } else {
01297 $newTag.='>';
01298 }
01299 $value = $newTag;
01300 }
01301 }
01302
01303 return $value;
01304 }
01305
01315 function processContent($value,$dir,$conf) {
01316 if ($dir!=0) $value = $this->bidir_htmlspecialchars($value,$dir);
01317 return $value;
01318 }
01319 }
01320
01321
01322
01323 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']) {
01324 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
01325 }
01326 ?>