00001 <?php
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00106 class t3lib_parsehtml {
00107 var $caseShift_cache=array();
00108
00109
00110
00111
00112
00113
00114
00115
00123 function getSubpart($content, $marker) {
00124 if ($marker && strstr($content,$marker)) {
00125 $start = strpos($content, $marker)+strlen($marker);
00126 $stop = @strpos($content, $marker, $start+1);
00127 $sub = substr($content, $start, $stop-$start);
00128
00129 $reg=Array();
00130 ereg('^[^<]*-->',$sub,$reg);
00131 $start+=strlen($reg[0]);
00132
00133 $reg=Array();
00134 ereg('<!--[^>]*$',$sub,$reg);
00135 $stop-=strlen($reg[0]);
00136
00137 return substr($content, $start, $stop-$start);
00138 }
00139 }
00140
00151 function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0) {
00152 $start = strpos($content, $marker);
00153 $stop = @strpos($content, $marker, $start+1)+strlen($marker);
00154 if ($start && $stop>$start) {
00155
00156 $before = substr($content, 0, $start);
00157 $reg=Array();
00158 ereg('<!--[^>]*$',$before,$reg);
00159 $start-=strlen($reg[0]);
00160 if ($keepMarker) {
00161 $reg_k=Array();
00162 if ($reg[0]) ereg('^[^>]*-->',substr($content,$start),$reg_k);
00163 $before_marker = substr($content, $start, strlen($reg_k[0]?$reg_k[0]:$marker));
00164 }
00165 $before = substr($content, 0, $start);
00166
00167 $after = substr($content, $stop);
00168 $reg=Array();
00169 ereg('^[^<]*-->',$after,$reg);
00170 $stop+=strlen($reg[0]);
00171 if ($keepMarker) {
00172 $reg_k=Array();
00173 if ($reg[0]) ereg('<!--[^<]*$',substr($content,0,$stop),$reg_k);
00174 $sLen = strlen($reg_k[0]?$reg_k[0]:$marker);
00175 $after_marker = substr($content, $stop-$sLen,$sLen);
00176 }
00177 $after = substr($content, $stop);
00178
00179
00180
00181 if (is_array($subpartContent)) {
00182 $substContent=$subpartContent[0].$this->getSubpart($content,$marker).$subpartContent[1];
00183 } else {
00184 $substContent=$subpartContent;
00185 }
00186
00187 if ($recursive && strpos($after, $marker)) {
00188 return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$this->substituteSubpart($after,$marker,$subpartContent);
00189 } else {
00190 return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$after;
00191 }
00192 } else {
00193 return $content;
00194 }
00195 }
00196
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00223 function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) {
00224 $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
00225 $regexStr = '</?('.implode('|',$tags).')(>|[[:space:]][^>]*>)';
00226
00227 $parts = spliti($regexStr,$content);
00228
00229 $newParts=array();
00230 $pointer=strlen($parts[0]);
00231 $buffer=$parts[0];
00232 $nested=0;
00233 reset($parts);
00234 next($parts);
00235 while(list($k,$v)=each($parts)) {
00236 $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
00237 $tagLen = strcspn(substr($content,$pointer),'>')+1;
00238
00239 if (!$isEndTag) {
00240 if (!$nested) {
00241 $newParts[]=$buffer;
00242 $buffer='';
00243 }
00244 $nested++;
00245 $mbuffer=substr($content,$pointer,strlen($v)+$tagLen);
00246 $pointer+=strlen($mbuffer);
00247 $buffer.=$mbuffer;
00248 } else {
00249 $nested--;
00250 $eliminated=0;
00251 if ($eliminateExtraEndTags && $nested<0) {
00252 $nested=0;
00253 $eliminated=1;
00254 } else {
00255 $buffer.=substr($content,$pointer,$tagLen);
00256 }
00257 $pointer+=$tagLen;
00258 if (!$nested && !$eliminated) {
00259 $newParts[]=$buffer;
00260 $buffer='';
00261 }
00262 $mbuffer=substr($content,$pointer,strlen($v));
00263 $pointer+=strlen($mbuffer);
00264 $buffer.=$mbuffer;
00265 }
00266
00267 }
00268 $newParts[]=$buffer;
00269 return $newParts;
00270 }
00271
00284 function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0) {
00285 $parts = $this->splitIntoBlock($tag,$content,TRUE);
00286 foreach($parts as $k => $v) {
00287 if ($k%2) {
00288 $firstTagName = $this->getFirstTagName($v, TRUE);
00289 $tagsArray = array();
00290 $tagsArray['tag_start'] = $this->getFirstTag($v);
00291 $tagsArray['tag_end'] = '</'.$firstTagName.'>';
00292 $tagsArray['tag_name'] = strtolower($firstTagName);
00293 $tagsArray['add_level'] = 1;
00294 $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
00295
00296 if ($callBackTags) $tagsArray = $procObj->$callBackTags($tagsArray,$level);
00297
00298 $parts[$k] =
00299 $tagsArray['tag_start'].
00300 $tagsArray['content'].
00301 $tagsArray['tag_end'];
00302 } else {
00303 if ($callBackContent) $parts[$k] = $procObj->$callBackContent($parts[$k],$level);
00304 }
00305 }
00306
00307 return implode('',$parts);
00308 }
00309
00320 function splitTags($tag,$content) {
00321 $tags = t3lib_div::trimExplode(',',$tag,1);
00322 $regexStr = '<('.implode('|',$tags).')(>|\/>|[[:space:]][^>]*>)';
00323 $parts = spliti($regexStr,$content);
00324
00325 $pointer = strlen($parts[0]);
00326 $newParts = array();
00327 $newParts[] = $parts[0];
00328 reset($parts);
00329 next($parts);
00330 while(list($k,$v)=each($parts)) {
00331 $tagLen = strcspn(substr($content,$pointer),'>')+1;
00332
00333
00334 $tag = substr($content,$pointer,$tagLen);
00335 $newParts[] = $tag;
00336 $pointer+= strlen($tag);
00337
00338
00339 $newParts[] = $v;
00340 $pointer+= strlen($v);
00341 }
00342 return $newParts;
00343 }
00344
00354 function getAllParts($parts,$tag_parts=1,$include_tag=1) {
00355 reset($parts);
00356 $newParts=array();
00357 while(list($k,$v)=each($parts)) {
00358 if (($k+($tag_parts?0:1))%2) {
00359 if (!$include_tag) $v=$this->removeFirstAndLastTag($v);
00360 $newParts[]=$v;
00361 }
00362 }
00363 return $newParts;
00364 }
00365
00373 function removeFirstAndLastTag($str) {
00374
00375 $endLen = strcspn($str,'>')+1;
00376 $str = substr($str,$endLen);
00377
00378 $str = strrev($str);
00379 $endLen = strcspn($str,'<')+1;
00380 $str = substr($str,$endLen);
00381
00382 return strrev($str);
00383 }
00384
00392 function getFirstTag($str) {
00393
00394 $endLen = strcspn($str,'>')+1;
00395 $str = substr($str,0,$endLen);
00396 return $str;
00397 }
00398
00407 function getFirstTagName($str,$preserveCase=FALSE) {
00408 list($tag) = split('[[:space:]]',substr(trim($this->getFirstTag($str)),1,-1), 2);
00409 if (!$preserveCase) $tag = strtoupper($tag);
00410
00411 return trim($tag);
00412 }
00413
00422 function get_tag_attributes($tag,$deHSC=0) {
00423 list($components,$metaC) = $this->split_tag_attributes($tag);
00424 $name = '';
00425 $valuemode = '';
00426 $attributes = array();
00427 $attributesMeta = array();
00428 if (is_array($components)) {
00429 while (list($key,$val) = each ($components)) {
00430 if ($val != '=') {
00431 if ($valuemode) {
00432 if ($name) {
00433 $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
00434 $attributesMeta[$name]['dashType']=$metaC[$key];
00435 $name = '';
00436 }
00437 } else {
00438 if ($namekey = ereg_replace('[^a-zA-Z0-9_:-]','',$val)) {
00439 $name = strtolower($namekey);
00440 $attributesMeta[$name]=array();
00441 $attributesMeta[$name]['origTag']=$namekey;
00442 $attributes[$name] = '';
00443 }
00444 }
00445 $valuemode = '';
00446 } else {
00447 $valuemode = 'on';
00448 }
00449 }
00450 if (is_array($attributes)) reset($attributes);
00451 return array($attributes,$attributesMeta);
00452 }
00453 }
00454
00464 function split_tag_attributes($tag) {
00465 $tag_tmp = trim(eregi_replace ('^<[^[:space:]]*','',trim($tag)));
00466
00467 $tag_tmp = trim(eregi_replace ('>$','',$tag_tmp));
00468
00469 $metaValue = array();
00470 $value = array();
00471 while (strcmp($tag_tmp,'')) {
00472 $firstChar=substr($tag_tmp,0,1);
00473 if (!strcmp($firstChar,'"') || !strcmp($firstChar,"'")) {
00474 $reg=explode($firstChar,$tag_tmp,3);
00475 $value[]=$reg[1];
00476 $metaValue[]=$firstChar;
00477 $tag_tmp=trim($reg[2]);
00478 } elseif (!strcmp($firstChar,'=')) {
00479 $value[] = '=';
00480 $metaValue[]='';
00481 $tag_tmp = trim(substr($tag_tmp,1));
00482 } else {
00483
00484 $reg = split('[[:space:]=]',$tag_tmp,2);
00485 $value[] = trim($reg[0]);
00486 $metaValue[]='';
00487 $tag_tmp = trim(substr($tag_tmp,strlen($reg[0]),1).$reg[1]);
00488 }
00489 }
00490 if (is_array($value)) reset($value);
00491 return array($value,$metaValue);
00492 }
00493
00507 function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area') {
00508 $content = strtolower($content);
00509 $analyzedOutput=array();
00510 $analyzedOutput['counts']=array();
00511 $analyzedOutput['errors']=array();
00512 $analyzedOutput['warnings']=array();
00513 $analyzedOutput['blocks']=array();
00514 $analyzedOutput['solo']=array();
00515
00516
00517 $blockTags = explode(',',$blockTags);
00518 foreach($blockTags as $tagName) {
00519 $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
00520 $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
00521 $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
00522 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin;
00523 if ($countBegin-$countEnd) {
00524 if ($countBegin-$countEnd > 0) {
00525 $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
00526 } else {
00527 $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
00528 }
00529 }
00530 }
00531
00532
00533 $soloTags = explode(',',$soloTags);
00534 foreach($soloTags as $tagName) {
00535 $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
00536 $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
00537 $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
00538 if ($countBegin) $analyzedOutput['counts'][$tagName]=$countBegin;
00539 if ($countEnd) {
00540 $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
00541 }
00542 }
00543
00544 return $analyzedOutput;
00545 }
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563
00600 function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array()) {
00601 $newContent = array();
00602 $tokArr = explode('<',$content);
00603 $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
00604 next($tokArr);
00605
00606 $c = 1;
00607 $tagRegister = array();
00608 $tagStack = array();
00609 while(list(,$tok)=each($tokArr)) {
00610 $firstChar = substr($tok,0,1);
00611 # if (strcmp(trim($firstChar),'')) { // It is a tag...
00612 if (ereg('[[:alnum:]\/]',$firstChar)) {
00613 $tagEnd = strcspn($tok,'>');
00614 if (strlen($tok)!=$tagEnd) {
00615 $endTag = $firstChar=='/' ? 1 : 0;
00616 $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00617 $tagParts = split('[[:space:]]',$tagContent,2);
00618 $tagName = strtolower($tagParts[0]);
00619 if (isset($tags[$tagName])) {
00620 if (is_array($tags[$tagName])) {
00621
00622 if (!$endTag) {
00623
00624 if (strcmp($tags[$tagName]['overrideAttribs'],'')) {
00625 $tagParts[1]=$tags[$tagName]['overrideAttribs'];
00626 }
00627
00628
00629 if (strcmp($tags[$tagName]['allowedAttribs'],'')) {
00630 if (!strcmp($tags[$tagName]['allowedAttribs'],'0')) {
00631 $tagParts[1]='';
00632 } elseif (trim($tagParts[1])) {
00633 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00634 $tagParts[1]='';
00635 $newTagAttrib = array();
00636 $tList = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
00637 while(list(,$allowTag)=each($tList)) {
00638 if (isset($tagAttrib[0][$allowTag])) $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
00639 }
00640 $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
00641 }
00642 }
00643
00644
00645 if (is_array($tags[$tagName]['fixAttrib'])) {
00646 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00647 $tagParts[1]='';
00648 reset($tags[$tagName]['fixAttrib']);
00649 while(list($attr,$params)=each($tags[$tagName]['fixAttrib'])) {
00650 if (strlen($params['set'])) $tagAttrib[0][$attr] = $params['set'];
00651 if (strlen($params['unset'])) unset($tagAttrib[0][$attr]);
00652 if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr])) $tagAttrib[0][$attr]=$params['default'];
00653 if ($params['always'] || isset($tagAttrib[0][$attr])) {
00654 if ($params['trim']) {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
00655 if ($params['intval']) {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
00656 if ($params['lower']) {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
00657 if ($params['upper']) {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
00658 if ($params['range']) {
00659 if (isset($params['range'][1])) {
00660 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
00661 } else {
00662 $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
00663 }
00664 }
00665 if (is_array($params['list'])) {
00666 if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName))) $tagAttrib[0][$attr]=$params['list'][0];
00667 }
00668 if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],''))) {
00669 unset($tagAttrib[0][$attr]);
00670 }
00671 if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp']))) {
00672 unset($tagAttrib[0][$attr]);
00673 }
00674 if ($params['prefixLocalAnchors']) {
00675 if (substr($tagAttrib[0][$attr],0,1)=='#') {
00676 $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
00677 $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
00678 if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL'))) {
00679 $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
00680 }
00681 }
00682 }
00683 if ($params['prefixRelPathWith']) {
00684 $urlParts = parse_url($tagAttrib[0][$attr]);
00685 if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/') {
00686 $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
00687 }
00688 }
00689 if ($params['userFunc']) {
00690 $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
00691 }
00692 }
00693 }
00694 $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
00695 }
00696 } else {
00697 $tagParts[1]='';
00698 }
00699
00700
00701 if ($tags[$tagName]['protect']) {
00702 $lt = '<'; $gt = '>';
00703 } else {
00704 $lt = '<'; $gt = '>';
00705 }
00706
00707 if ($tags[$tagName]['remap']) $tagParts[0] = $tags[$tagName]['remap'];
00708
00709
00710 if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib']) {
00711 $setTag=1;
00712
00713 if ($tags[$tagName]['nesting']) {
00714 if (!is_array($tagRegister[$tagName])) $tagRegister[$tagName]=array();
00715
00716 if ($endTag) {
00717
00718
00719
00720
00721
00722 $correctTag=1;
00723 if ($tags[$tagName]['nesting']=='global') {
00724 $lastEl = end($tagStack);
00725 if (strcmp($tagName,$lastEl)) {
00726 if (in_array($tagName,$tagStack)) {
00727 while(count($tagStack) && strcmp($tagName,$lastEl)) {
00728 $elPos = end($tagRegister[$lastEl]);
00729 unset($newContent[$elPos]);
00730
00731 array_pop($tagRegister[$lastEl]);
00732 array_pop($tagStack);
00733 $lastEl = end($tagStack);
00734 }
00735 } else {
00736 $correctTag=0;
00737 }
00738 }
00739 }
00740 if (!count($tagRegister[$tagName]) || !$correctTag) {
00741 $setTag=0;
00742 } else {
00743 array_pop($tagRegister[$tagName]);
00744 if ($tags[$tagName]['nesting']=='global') {array_pop($tagStack);}
00745 }
00746 } else {
00747 array_push($tagRegister[$tagName],$c);
00748 if ($tags[$tagName]['nesting']=='global') {array_push($tagStack,$tagName);}
00749 }
00750 }
00751
00752 if ($setTag) {
00753
00754 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='<');
00755 }
00756 }
00757 } else {
00758 $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
00759 }
00760 } elseif ($keepAll) {
00761 if (!strcmp($keepAll,'protect')) {
00762 $lt = '<'; $gt = '>';
00763 } else {
00764 $lt = '<'; $gt = '>';
00765 }
00766 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='<');
00767 }
00768 $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
00769 } else {
00770 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);
00771 }
00772 } else {
00773 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);
00774 }
00775 }
00776
00777
00778 reset($tagRegister);
00779 while(list($tag,$positions)=each($tagRegister)) {
00780 reset($positions);
00781 while(list(,$pKey)=each($positions)) {
00782 unset($newContent[$pKey]);
00783 }
00784 }
00785
00786 return implode('',$newContent);
00787 }
00788
00796 function bidir_htmlspecialchars($value,$dir) {
00797 if ($dir==1) {
00798 $value = htmlspecialchars($value);
00799 } elseif ($dir==2) {
00800 $value = t3lib_div::deHSCentities(htmlspecialchars($value));
00801 } elseif ($dir==-1) {
00802 $value = str_replace('>','>',$value);
00803 $value = str_replace('<','<',$value);
00804 $value = str_replace('"','"',$value);
00805 $value = str_replace('&','&',$value);
00806 }
00807 return $value;
00808 }
00809
00819 function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='') {
00820
00821 $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
00822 foreach($parts as $k => $v) {
00823 if ($k%2) {
00824 $params = $this->get_tag_attributes($v,1);
00825 $tagEnd = substr($v,-2)=='/>' ? ' />' : '>'; // Detect tag-ending so that it is re-applied correctly.
00826 $firstTagName = $this->getFirstTagName($v); // The 'name' of the first tag
00827 $somethingDone=0;
00828 $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
00829 switch(strtolower($firstTagName)) {
00830 // background - attribute:
00831 case 'td':
00832 case 'body':
00833 case 'table':
00834 $src = $params[0]['background'];
00835 if ($src) {
00836 $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
00837 $somethingDone=1;
00838 }
00839 break;
00840 // src attribute
00841 case 'img':
00842 case 'input':
00843 case 'script':
00844 case 'embed':
00845 $src = $params[0]['src'];
00846 if ($src) {
00847 $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
00848 $somethingDone=1;
00849 }
00850 break;
00851 case 'link':
00852 case 'a':
00853 $src = $params[0]['href'];
00854 if ($src) {
00855 $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
00856 $somethingDone=1;
00857 }
00858 break;
00859 // action attribute
00860 case 'form':
00861 $src = $params[0]['action'];
00862 if ($src) {
00863 $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
00864 $somethingDone=1;
00865 }
00866 break;
00867 }
00868 if ($somethingDone) {
00869 $tagParts = split('[[:space:]]',$v,2);
00870 $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
00871 $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).
00872 $tagEnd;
00873 }
00874 }
00875 }
00876 $content = implode('',$parts);
00877
00878 // Fix <style> section:
00879 $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
00880 if (strlen($prefix)) {
00881 $parts = $this->splitIntoBlock('style',$content);
00882 foreach($parts as $k => $v) {
00883 if ($k%2) {
00884 $parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
00885 }
00886 }
00887 $content = implode('',$parts);
00888 }
00889
00890 return $content;
00891 }
00892
00902 function prefixRelPath($prefix,$srcVal,$suffix='') {
00903 $pU = parse_url($srcVal);
00904 if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/') {
00905 $srcVal = $prefix.$srcVal.$suffix;
00906 }
00907 return $srcVal;
00908 }
00909
00920 function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0) {
00921 $fontSplit = $this->splitIntoBlock('font',$value);
00922 reset($fontSplit);
00923 while(list($k,$v)=each($fontSplit)) {
00924 if ($k%2) {
00925 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
00926 $newAttribs=array();
00927 if ($keepFace && $attribArray['face']) $newAttribs[]='face="'.$attribArray['face'].'"';
00928 if ($keepSize && $attribArray['size']) $newAttribs[]='size="'.$attribArray['size'].'"';
00929 if ($keepColor && $attribArray['color']) $newAttribs[]='color="'.$attribArray['color'].'"';
00930
00931 $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
00932 if (count($newAttribs)) {
00933 $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
00934 } else {
00935 $fontSplit[$k]=$innerContent;
00936 }
00937 }
00938 }
00939 return implode('',$fontSplit);
00940 }
00941
00951 function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') {
00952
00953 foreach($tags as $from => $to) {
00954 $value = eregi_replace($ltChar.$from.'>',$ltChar2.$to.'>',$value);
00955 $value = eregi_replace($ltChar.$from.'[[:space:]]([^>]*)>',$ltChar2.$to.' \\1>',$value);
00956 $value = eregi_replace($ltChar.'\/'.$from.'[^>]*>',$ltChar2.'/'.$to.'>',$value);
00957 }
00958 return $value;
00959 }
00960
00968 function unprotectTags($content,$tagList='') {
00969 $tagsArray = t3lib_div::trimExplode(',',$tagList,1);
00970 $contentParts = explode('<',$content);
00971 next($contentParts);
00972 while(list($k,$tok)=each($contentParts)) {
00973 $firstChar = substr($tok,0,1);
00974 if (strcmp(trim($firstChar),'')) {
00975 $subparts = explode('>',$tok,2);
00976 $tagEnd = strlen($subparts[0]);
00977 if (strlen($tok)!=$tagEnd) {
00978 $endTag = $firstChar=='/' ? 1 : 0;
00979 $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00980 $tagParts = split('[[:space:]]',$tagContent,2);
00981 $tagName = strtolower($tagParts[0]);
00982 if (!strcmp($tagList,'') || in_array($tagName,$tagsArray)) {
00983 $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
00984 } else $contentParts[$k] = '<'.$tok;
00985 } else $contentParts[$k] = '<'.$tok;
00986 } else $contentParts[$k] = '<'.$tok;
00987 }
00988
00989 return implode('',$contentParts);
00990 }
00991
01001 function stripTagsExcept($value,$tagList) {
01002 $tags=t3lib_div::trimExplode(',',$tagList,1);
01003 $forthArr=array();
01004 $backArr=array();
01005 while(list(,$theTag)=each($tags)) {
01006 $forthArr[$theTag]=md5($theTag);
01007 $backArr[md5($theTag)]=$theTag;
01008 }
01009 $value = $this->mapTags($value,$forthArr,'<','_');
01010 $value=strip_tags($value);
01011 $value = $this->mapTags($value,$backArr,'_','<');
01012 return $value;
01013 }
01014
01024 function caseShift($str,$flag,$cacheKey='') {
01025 if (is_array($str)) {
01026 if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey])) {
01027 reset($str);
01028 while(list($k)=each($str)) {
01029 $str[$k] = strtoupper($str[$k]);
01030 }
01031 if ($cacheKey) $this->caseShift_cache[$cacheKey]=$str;
01032 } else {
01033 $str = $this->caseShift_cache[$cacheKey];
01034 }
01035 } elseif (!$flag) $str = strtoupper($str);
01036 return $str;
01037 }
01038
01048 function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0) {
01049 $accu=array();
01050 reset($tagAttrib);
01051 while(list($k,$v)=each($tagAttrib)) {
01052 if ($xhtmlClean) {
01053 $attr=strtolower($k);
01054 if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
01055 $attr.='="'.htmlspecialchars($v).'"';
01056 }
01057 } else {
01058 $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
01059 if (strcmp($v,'') || isset($meta[$k]['dashType'])) {
01060 $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
01061 $attr.='='.$dash.$v.$dash;
01062 }
01063 }
01064 $accu[]=$attr;
01065 }
01066 return implode(' ',$accu);
01067 }
01068
01077 function get_tag_attributes_classic($tag,$deHSC=0) {
01078 $attr=$this->get_tag_attributes($tag,$deHSC);
01079 return is_array($attr[0])?$attr[0]:array();
01080 }
01081
01090 function indentLines($content, $number=1, $indentChar="\t") {
01091 $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
01092 $lines = explode(chr(10),str_replace(chr(13),'',$content));
01093 while(list($k,$v) = each($lines)) {
01094 $lines[$k] = $preTab.$v;
01095 }
01096 return implode(chr(10), $lines);
01097 }
01098
01107 function HTMLparserConfig($TSconfig,$keepTags=array()) {
01108 // Allow tags (base list, merged with incoming array)
01109 $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
01110 $keepTags = array_merge($alTags,$keepTags);
01111
01112 // Set config properties.
01113 if (is_array($TSconfig['tags.'])) {
01114 reset($TSconfig['tags.']);
01115 while(list($key,$tagC)=each($TSconfig['tags.'])) {
01116 if (!is_array($tagC) && $key==strtolower($key)) {
01117 if (!strcmp($tagC,'0')) unset($keepTags[$key]);
01118 if (!strcmp($tagC,'1') && !isset($keepTags[$key])) $keepTags[$key]=1;
01119 }
01120 }
01121
01122 reset($TSconfig['tags.']);
01123 while(list($key,$tagC)=each($TSconfig['tags.'])) {
01124 if (is_array($tagC) && $key==strtolower($key)) {
01125 $key=substr($key,0,-1);
01126 if (!is_array($keepTags[$key])) $keepTags[$key]=array();
01127 if (is_array($tagC['fixAttrib.'])) {
01128 reset($tagC['fixAttrib.']);
01129 while(list($atName,$atConfig)=each($tagC['fixAttrib.'])) {
01130 if (is_array($atConfig)) {
01131 $atName=substr($atName,0,-1);
01132 if (!is_array($keepTags[$key]['fixAttrib'][$atName])) {
01133 $keepTags[$key]['fixAttrib'][$atName]=array();
01134 }
01135 $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01136 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],'')) $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
01137 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],'')) $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
01138 }
01139 }
01140 }
01141 unset($tagC['fixAttrib.']);
01142 unset($tagC['fixAttrib']);
01143 $keepTags[$key] = array_merge($keepTags[$key],$tagC); // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01144 }
01145 }
01146 }
01147 // localNesting
01148 if ($TSconfig['localNesting']) {
01149 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
01150 while(list(,$tn)=each($lN)) {
01151 if (isset($keepTags[$tn])) {
01152 $keepTags[$tn]['nesting']=1;
01153 }
01154 }
01155 }
01156 if ($TSconfig['globalNesting']) {
01157 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
01158 while(list(,$tn)=each($lN)) {
01159 if (isset($keepTags[$tn])) {
01160 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
01161 $keepTags[$tn]['nesting']='global';
01162 }
01163 }
01164 }
01165 if ($TSconfig['rmTagIfNoAttrib']) {
01166 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
01167 while(list(,$tn)=each($lN)) {
01168 if (isset($keepTags[$tn])) {
01169 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
01170 $keepTags[$tn]['rmTagIfNoAttrib']=1;
01171 }
01172 }
01173 }
01174 if ($TSconfig['noAttrib']) {
01175 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
01176 while(list(,$tn)=each($lN)) {
01177 if (isset($keepTags[$tn])) {
01178 if (!is_array($keepTags[$tn])) $keepTags[$tn]=array();
01179 $keepTags[$tn]['allowedAttribs']=0;
01180 }
01181 }
01182 }
01183 if ($TSconfig['removeTags']) {
01184 $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
01185 while(list(,$tn)=each($lN)) {
01186 $keepTags[$tn]=array();
01187 $keepTags[$tn]['allowedAttribs']=0;
01188 $keepTags[$tn]['rmTagIfNoAttrib']=1;
01189 }
01190 }
01191
01192 // Create additional configuration:
01193 $addConfig=array();
01194 if ($TSconfig['xhtml_cleaning']) {
01195 $addConfig['xhtml']=1;
01196 }
01197
01198 return array(
01199 $keepTags,
01200 ''.$TSconfig['keepNonMatchedTags'],
01201 intval($TSconfig['htmlSpecialChars']),
01202 $addConfig
01203 );
01204 }
01205
01231 function XHTML_clean($content) {
01232 $content = $this->HTMLcleaner(
01233 $content,
01234 array(), // No tags treated specially
01235 1, // Keep ALL tags.
01236 0, // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
01237 array('xhtml' => 1)
01238 );
01239 return $content;
01240 }
01241
01253 function processTag($value,$conf,$endTag,$protected=0) {
01254 // Return immediately if protected or no parameters
01255 if ($protected || !count($conf)) return $value;
01256 // OK then, begin processing for XHTML output:
01257 // STILL VERY EXPERIMENTAL!!
01258 if ($conf['xhtml']) {
01259 if ($endTag) { // Endtags are just set lowercase right away
01260 $value = strtolower($value);
01261 } elseif (substr($value,0,2)!='<!') { // ... and comments are ignored.
01262 $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1)); // Finding inner value with out < >
01263 list($tagName,$tagP)=split('[[:space:]]',$inValue,2); // Separate attributes and tagname
01264 $tagName = strtolower($tagName);
01265
01266 // Process attributes
01267 $tagAttrib = $this->get_tag_attributes($tagP);
01268 if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt'])) $tagAttrib[0]['alt']=''; // Set alt attribute for all images (not XHTML though...)
01269 if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type'])) $tagAttrib[0]['type']='text/javascript'; // Set type attribute for all script-tags
01270 $outA=array();
01271 reset($tagAttrib[0]);
01272 while(list($attrib_name,$attrib_value)=each($tagAttrib[0])) {
01273 // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
01274 $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
01275 }
01276 $newTag='<'.trim($tagName.' '.implode(' ',$outA));
01277
01278 if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>') {
01279 $newTag.=' />';
01280 } else {
01281 $newTag.='>';
01282 }
01283 $value = $newTag;
01284 }
01285 }
01286
01287 return $value;
01288 }
01289
01299 function processContent($value,$dir,$conf) {
01300 if ($dir!=0) $value = $this->bidir_htmlspecialchars($value,$dir);
01301 return $value;
01302 }
01303 }
01304
01305
01306
01307 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']) {
01308 include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
01309 }
01310 ?>