00001 <?php
00002 /***************************************************************
00003 *  Copyright notice
00004 *
00005 *  (c) 1999-2006 Kasper Skaarhoj (
00006 *  All rights reserved
00007 *
00008 *  This script is part of the TYPO3 project. The TYPO3 project is
00009 *  free software; you can redistribute it and/or modify
00010 *  it under the terms of the GNU General Public License as published by
00011 *  the Free Software Foundation; either version 2 of the License, or
00012 *  (at your option) any later version.
00013 *
00014 *  The GNU General Public License can be found at
00015 *
00016 *  A copy is found in the textfile GPL.txt and important notices to the license
00017 *  from the author is found in LICENSE.txt distributed with these scripts.
00018 *
00019 *
00020 *  This script is distributed in the hope that it will be useful,
00021 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00023 *  GNU General Public License for more details.
00024 *
00025 *  This copyright notice MUST APPEAR in all copies of the script!
00026 ***************************************************************/
00106 class t3lib_parsehtml   {
00107         var $caseShift_cache=array();
00110         // *******************************************'
00111         // COPY FROM class.tslib_content.php: / BEGIN
00112         // substituteSubpart
00113         // Cleaned locally 2/2003 !!!! (so different from tslib_content version)
00114         // *******************************************'
00123         function getSubpart($content, $marker)  {
00124                 $start = strpos($content, $marker);
00125                 if ($start===false)     { return ''; }
00126                 $start += strlen($marker);
00127                 $stop = strpos($content, $marker, $start);
00128                         // Q: What shall get returned if no stop marker is given /*everything till the end*/ or nothing
00129                 if ($stop===false)      { return /*substr($content, $start)*/ ''; }
00130                 $content = substr($content, $start, $stop-$start);
00131                 $matches = array();
00132                 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1)      {
00133                         return $matches[2];
00134                 }
00135                 $matches = array();
00136                 if (preg_match('/(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1)     {
00137                         return $matches[1];
00138                 }
00139                 $matches = array();
00140                 if (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $content, $matches)===1)      {
00141                         return $matches[2];
00142                 }
00143                 return $content;
00144         }
00156         function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0) {
00157                 $start = strpos($content, $marker);
00158                 if ($start===false)     { return $content; }
00159                 $startAM = $start+strlen($marker);
00160                 $stop = strpos($content, $marker, $startAM);
00161                 if ($stop===false)      { return $content; }
00162                 $stopAM = $stop+strlen($marker);
00163                 $before = substr($content, 0, $start);
00164                 $after = substr($content, $stopAM);
00165                 $between = substr($content, $startAM, $stop-$startAM);
00167                 if ($recursive) {
00168                         $after = t3lib_parsehtml::substituteSubpart($after, $marker, $subpartContent, $recursive, $keepMarker);
00169                 }
00171                 if ($keepMarker)        {
00172                         $matches = array();
00173                         if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)      {
00174                                 $before .= $marker.$matches[1];
00175                                 $between = $matches[2];
00176                                 $after = $matches[3].$marker.$after;
00177                         } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)      {
00178                                 $before .= $marker;
00179                                 $between = $matches[1];
00180                                 $after = $matches[2].$marker.$after;
00181                         } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1)        {
00182                                 $before .= $marker.$matches[1];
00183                                 $between = $matches[2];
00184                                 $after = $marker.$after;
00185                         } else  {
00186                                 $before .= $marker;
00187                                 $after = $marker.$after;
00188                         }
00189                 } else  {
00190                         $matches = array();
00191                         if (preg_match('/^(.*)\<\!\-\-[^\>]*$/s', $before, $matches)===1)       {
00192                                 $before = $matches[1];
00193                         }
00194                         if (is_array($subpartContent))  {
00195                                 $matches = array();
00196                                 if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)      {
00197                                         $between = $matches[2];
00198                                 } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)      {
00199                                         $between = $matches[1];
00200                                 } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1)        {
00201                                         $between = $matches[2];
00202                                 }
00203                         }
00204                         $matches = array();
00205                         if (preg_match('/^[^\<]*\-\-\>(.*)$/s', $after, $matches)===1)  {
00206                                 $after = $matches[1];
00207                         }
00208                 }
00210                 if (is_array($subpartContent))  {
00211                         $between = $subpartContent[0].$between.$subpartContent[1];
00212                 } else  {
00213                         $between = $subpartContent;
00214                 }
00216                 return $before.$between.$after;
00217         }
00220         // *******************************************'
00221         // COPY FROM class.tslib_content.php: / END
00222         // *******************************************'
00230         /************************************
00231          *
00232          * Parsing HTML code
00233          *
00234          ************************************/
00247         function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) {
00248                 $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
00249                 $regexStr = '/\<\/?('.implode('|', $tags).')(\s*\>|\s[^\>]*\>)/si';
00251                 $parts = preg_split($regexStr, $content);
00253                 $newParts=array();
00254                 $pointer=strlen($parts[0]);
00255                 $buffer=$parts[0];
00256                 $nested=0;
00257                 reset($parts);
00258                 next($parts);
00259                 while(list($k,$v)=each($parts)) {
00260                         $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
00261                         $tagLen = strcspn(substr($content,$pointer),'>')+1;
00263                         if (!$isEndTag) {       // We meet a start-tag:
00264                                 if (!$nested)   {       // Ground level:
00265                                         $newParts[]=$buffer;    // previous buffer stored
00266                                         $buffer='';
00267                                 }
00268                                 $nested++;      // We are inside now!
00269                                 $mbuffer=substr($content,$pointer,strlen($v)+$tagLen);  // New buffer set and pointer increased
00270                                 $pointer+=strlen($mbuffer);
00271                                 $buffer.=$mbuffer;
00272                         } else {        // If we meet an endtag:
00273                                 $nested--;      // decrease nested-level
00274                                 $eliminated=0;
00275                                 if ($eliminateExtraEndTags && $nested<0)        {
00276                                         $nested=0;
00277                                         $eliminated=1;
00278                                 } else {
00279                                         $buffer.=substr($content,$pointer,$tagLen);     // In any case, add the endtag to current buffer and increase pointer
00280                                 }
00281                                 $pointer+=$tagLen;
00282                                 if (!$nested && !$eliminated)   {       // if we're back on ground level, (and not by eliminating tags...
00283                                         $newParts[]=$buffer;
00284                                         $buffer='';
00285                                 }
00286                                 $mbuffer=substr($content,$pointer,strlen($v));  // New buffer set and pointer increased
00287                                 $pointer+=strlen($mbuffer);
00288                                 $buffer.=$mbuffer;
00289                         }
00291                 }
00292                 $newParts[]=$buffer;
00293                 return $newParts;
00294         }
00308         function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)   {
00309                 $parts = $this->splitIntoBlock($tag,$content,TRUE);
00310                 foreach($parts as $k => $v)     {
00311                         if ($k%2)       {
00312                                 $firstTagName = $this->getFirstTagName($v, TRUE);
00313                                 $tagsArray = array();
00314                                 $tagsArray['tag_start'] = $this->getFirstTag($v);
00315                                 $tagsArray['tag_end'] = '</'.$firstTagName.'>';
00316                                 $tagsArray['tag_name'] = strtolower($firstTagName);
00317                                 $tagsArray['add_level'] = 1;
00318                                 $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
00320                                 if ($callBackTags)      $tagsArray = $procObj->$callBackTags($tagsArray,$level);
00322                                 $parts[$k] =
00323                                         $tagsArray['tag_start'].
00324                                         $tagsArray['content'].
00325                                         $tagsArray['tag_end'];
00326                         } else {
00327                                 if ($callBackContent)   $parts[$k] = $procObj->$callBackContent($parts[$k],$level);
00328                         }
00329                 }
00331                 return implode('',$parts);
00332         }
00344         function splitTags($tag,$content)       {
00345                 $tags = t3lib_div::trimExplode(',',$tag,1);
00346                 $regexStr = '/\<('.implode('|', $tags).')(\s[^>]*)?\/?>/si';
00347                 $parts = preg_split($regexStr, $content);
00349                 $pointer = strlen($parts[0]);
00350                 $newParts = array();
00351                 $newParts[] = $parts[0];
00352                 reset($parts);
00353                 next($parts);
00354                 while(list($k,$v)=each($parts)) {
00355                         $tagLen = strcspn(substr($content,$pointer),'>')+1;
00357                                 // Set tag:
00358                         $tag = substr($content,$pointer,$tagLen);       // New buffer set and pointer increased
00359                         $newParts[] = $tag;
00360                         $pointer+= strlen($tag);
00362                                 // Set content:
00363                         $newParts[] = $v;
00364                         $pointer+= strlen($v);
00365                 }
00366                 return $newParts;
00367         }
00378         function getAllParts($parts,$tag_parts=1,$include_tag=1)        {
00379                 $newParts=array();
00380                 foreach ($parts as $k => $v)    {
00381                         if (($k+($tag_parts?0:1))%2)    {
00382                                 if (!$include_tag)      $v=$this->removeFirstAndLastTag($v);
00383                                 $newParts[]=$v;
00384                         }
00385                 }
00386                 return $newParts;
00387         }
00396         function removeFirstAndLastTag($str)    {
00397                         // End of first tag:
00398                 $start = strpos($str,'>');
00399                         // Begin of last tag:
00400                 $end = strrpos($str,'<');
00401                         // return
00402                 return substr($str, $start+1, $end-$start-1);
00403         }
00412         function getFirstTag($str)      {
00413                         // First:
00414                 $endLen = strpos($str,'>')+1;
00415                 return substr($str,0,$endLen);
00416         }
00426         function getFirstTagName($str,$preserveCase=FALSE)      {
00427                 $matches = array();
00428                 if (preg_match('/^\s*\<([^\s\>]+)(\s|\>)/', $str, $matches)===1)        {
00429                         if (!$preserveCase)     {
00430                                 return strtoupper($matches[1]);
00431                         }
00432                         return $matches[1];
00433                 }
00434                 return '';
00435         }
00445         function get_tag_attributes($tag,$deHSC=0)      {
00446                 list($components,$metaC) = $this->split_tag_attributes($tag);
00447                 $name = '';      // attribute name is stored here
00448                 $valuemode = false;
00449                 $attributes = array();
00450                 $attributesMeta = array();
00451                 if (is_array($components))      {
00452                         foreach ($components as $key => $val)   {
00453                                 if ($val != '=')        {       // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
00454                                         if ($valuemode) {
00455                                                 if ($name)      {
00456                                                         $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
00457                                                         $attributesMeta[$name]['dashType']=$metaC[$key];
00458                                                         $name = '';
00459                                                 }
00460                                         } else {
00461                                                 if ($namekey = preg_replace('/[^[:alnum:]_\:\-]/','',$val))     {
00462                                                         $name = strtolower($namekey);
00463                                                         $attributesMeta[$name]=array();
00464                                                         $attributesMeta[$name]['origTag']=$namekey;
00465                                                         $attributes[$name] = '';
00466                                                 }
00467                                         }
00468                                         $valuemode = false;
00469                                 } else {
00470                                         $valuemode = true;
00471                                 }
00472                         }
00473                         return array($attributes,$attributesMeta);
00474                 }
00475         }
00486         function split_tag_attributes($tag)     {
00487                 $matches = array();
00488                 if (preg_match('/(\<[^\s]+\s+)?(.*?)\s*(\>)?$/s', $tag, $matches)!==1)  {
00489                         return array(array(), array());
00490                 }
00491                 $tag_tmp = $matches[2];
00493                 $metaValue = array();
00494                 $value = array();
00495                 $matches = array();
00496                 if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\s"\'\=]+|\=)/s', $tag_tmp, $matches)>0)     {
00497                         foreach ($matches[1] as $part)  {
00498                                 $firstChar = substr($part, 0, 1);
00499                                 if ($firstChar=='"' || $firstChar=="'") {
00500                                         $metaValue[] = $firstChar;
00501                                         $value[] = substr($part, 1, -1);
00502                                 } else  {
00503                                         $metaValue[] = '';
00504                                         $value[] = $part;
00505                                 }
00506                         }
00507                 }
00508                 return array($value,$metaValue);
00509         }
00524         function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')       {
00525                 $content = strtolower($content);
00526                 $analyzedOutput=array();
00527                 $analyzedOutput['counts']=array();      // Counts appearances of start-tags
00528                 $analyzedOutput['errors']=array();      // Lists ERRORS
00529                 $analyzedOutput['warnings']=array();    // Lists warnings.
00530                 $analyzedOutput['blocks']=array();      // Lists stats for block-tags
00531                 $analyzedOutput['solo']=array();        // Lists stats for solo-tags
00533                         // Block tags, must have endings...
00534                 $blockTags = explode(',',$blockTags);
00535                 foreach($blockTags as $tagName) {
00536                         $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
00537                         $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
00538                         $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
00539                         if ($countBegin)        $analyzedOutput['counts'][$tagName]=$countBegin;
00540                         if ($countBegin-$countEnd)      {
00541                                 if ($countBegin-$countEnd > 0)  {
00542                                         $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
00543                                 } else {
00544                                         $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
00545                                 }
00546                         }
00547                 }
00549                         // Solo tags, must NOT have endings...
00550                 $soloTags = explode(',',$soloTags);
00551                 foreach($soloTags as $tagName)  {
00552                         $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
00553                         $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
00554                         $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
00555                         if ($countBegin)        $analyzedOutput['counts'][$tagName]=$countBegin;
00556                         if ($countEnd)  {
00557                                 $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
00558                         }
00559                 }
00561                 return $analyzedOutput;
00562         }
00575         /*********************************
00576          *
00577          * Clean HTML code
00578          *
00579          *********************************/
00617         function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())      {
00618                 $newContent = array();
00619                 $tokArr = explode('<',$content);
00620                 $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
00621                 next($tokArr);
00623                 $c = 1;
00624                 $tagRegister = array();
00625                 $tagStack = array();
00626                 while(list(,$tok)=each($tokArr))        {
00627                         $firstChar = substr($tok,0,1);
00628 #                       if (strcmp(trim($firstChar),''))        {               // It is a tag...
00629                         if (preg_match('/[[:alnum:]\/]/',$firstChar)==1)        {               // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
00630                                 $tagEnd = strpos($tok,'>');
00631                                 if ($tagEnd)    {       // If there is and end-bracket...       tagEnd can't be 0 as the first character can't be a >
00632                                         $endTag = $firstChar=='/' ? 1 : 0;
00633                                         $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00634                                         $tagParts = preg_split('/\s+/s',$tagContent,2);
00635                                         $tagName = strtolower($tagParts[0]);
00636                                         if (isset($tags[$tagName]))     {
00637                                                 if (is_array($tags[$tagName]))  {       // If there is processing to do for the tag:
00639                                                         if (!$endTag)   {       // If NOT an endtag, do attribute processing (added dec. 2003)
00640                                                                         // Override attributes
00641                                                                 if (strcmp($tags[$tagName]['overrideAttribs'],''))      {
00642                                                                         $tagParts[1]=$tags[$tagName]['overrideAttribs'];
00643                                                                 }
00645                                                                         // Allowed tags
00646                                                                 if (strcmp($tags[$tagName]['allowedAttribs'],''))       {
00647                                                                         if (!strcmp($tags[$tagName]['allowedAttribs'],'0'))     {       // No attribs allowed
00648                                                                                 $tagParts[1]='';
00649                                                                         } elseif (trim($tagParts[1])) {
00650                                                                                 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00651                                                                                 $tagParts[1]='';
00652                                                                                 $newTagAttrib = array();
00653                                                                                 if (!($tList = $tags[$tagName]['_allowedAttribs']))     {
00654                                                                                                 // Just explode attribts for tag once
00655                                                                                         $tList = $tags[$tagName]['_allowedAttribs'] = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
00656                                                                                 }
00657                                                                                 foreach ($tList as $allowTag)   {
00658                                                                                         if (isset($tagAttrib[0][$allowTag]))    $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
00659                                                                                 }
00660                                                                                 $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
00661                                                                         }
00662                                                                 }
00664                                                                         // Fixed attrib values
00665                                                                 if (is_array($tags[$tagName]['fixAttrib']))     {
00666                                                                         $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00667                                                                         $tagParts[1]='';
00668                                                                         reset($tags[$tagName]['fixAttrib']);
00669                                                                         while(list($attr,$params)=each($tags[$tagName]['fixAttrib']))   {
00670                                                                                 if (strlen($params['set']))     $tagAttrib[0][$attr] = $params['set'];
00671                                                                                 if (strlen($params['unset']))   unset($tagAttrib[0][$attr]);
00672                                                                                 if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr]))      $tagAttrib[0][$attr]=$params['default'];
00673                                                                                 if ($params['always'] || isset($tagAttrib[0][$attr]))   {
00674                                                                                         if ($params['trim'])    {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
00675                                                                                         if ($params['intval'])  {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
00676                                                                                         if ($params['lower'])   {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
00677                                                                                         if ($params['upper'])   {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
00678                                                                                         if ($params['range'])   {
00679                                                                                                 if (isset($params['range'][1])) {
00680                                                                                                         $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
00681                                                                                                 } else {
00682                                                                                                         $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
00683                                                                                                 }
00684                                                                                         }
00685                                                                                         if (is_array($params['list']))  {
00686                                                                                                 if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName)))     $tagAttrib[0][$attr]=$params['list'][0];
00687                                                                                         }
00688                                                                                         if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],'')))        {
00689                                                                                                 unset($tagAttrib[0][$attr]);
00690                                                                                         }
00691                                                                                         if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp'])))      {
00692                                                                                                 unset($tagAttrib[0][$attr]);
00693                                                                                         }
00694                                                                                         if ($params['prefixLocalAnchors'])      {
00695                                                                                                 if (substr($tagAttrib[0][$attr],0,1)=='#')      {
00696                                                                                                         $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
00697                                                                                                         $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
00698                                                                                                         if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL')))           {
00699                                                                                                                 $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
00700                                                                                                         }
00701                                                                                                 }
00702                                                                                         }
00703                                                                                         if ($params['prefixRelPathWith'])       {
00704                                                                                                 $urlParts = parse_url($tagAttrib[0][$attr]);
00705                                                                                                 if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/') {       // If it is NOT an absolute URL (by http: or starting "/")
00706                                                                                                         $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
00707                                                                                                 }
00708                                                                                         }
00709                                                                                         if ($params['userFunc'])        {
00710                                                                                                 $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
00711                                                                                         }
00712                                                                                 }
00713                                                                         }
00714                                                                         $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
00715                                                                 }
00716                                                         } else {        // If endTag, remove any possible attributes:
00717                                                                 $tagParts[1]='';
00718                                                         }
00720                                                                 // Protecting the tag by converting < and > to &lt; and &gt; ??
00721                                                         if ($tags[$tagName]['protect']) {
00722                                                                 $lt = '&lt;';   $gt = '&gt;';
00723                                                         } else {
00724                                                                 $lt = '<';      $gt = '>';
00725                                                         }
00726                                                                 // Remapping tag name?
00727                                                         if ($tags[$tagName]['remap'])   $tagParts[0] = $tags[$tagName]['remap'];
00729                                                                 // rmTagIfNoAttrib
00730                                                         if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib'])       {
00731                                                                 $setTag=1;
00733                                                                 if ($tags[$tagName]['nesting']) {
00734                                                                         if (!is_array($tagRegister[$tagName]))  $tagRegister[$tagName]=array();
00736                                                                         if ($endTag)    {
00737 /*                                                                              if ($tags[$tagName]['nesting']=='global')       {
00738                                                                                         $lastEl = end($tagStack);
00739                                                                                         $correctTag = !strcmp($tagName,$lastEl);
00740                                                                                 } else $correctTag=1;
00741         */
00742                                                                                 $correctTag=1;
00743                                                                                 if ($tags[$tagName]['nesting']=='global')       {
00744                                                                                         $lastEl = end($tagStack);
00745                                                                                         if (strcmp($tagName,$lastEl))   {
00746                                                                                                 if (in_array($tagName,$tagStack))       {
00747                                                                                                         while(count($tagStack) && strcmp($tagName,$lastEl))     {
00748                                                                                                                 $elPos = end($tagRegister[$lastEl]);
00749                                                                                                                 unset($newContent[$elPos]);
00751                                                                                                                 array_pop($tagRegister[$lastEl]);
00752                                                                                                                 array_pop($tagStack);
00753                                                                                                                 $lastEl = end($tagStack);
00754                                                                                                         }
00755                                                                                                 } else {
00756                                                                                                         $correctTag=0;  // In this case the
00757                                                                                                 }
00758                                                                                         }
00759                                                                                 }
00760                                                                                 if (!count($tagRegister[$tagName]) || !$correctTag)     {
00761                                                                                         $setTag=0;
00762                                                                                 } else {
00763                                                                                         array_pop($tagRegister[$tagName]);
00764                                                                                         if ($tags[$tagName]['nesting']=='global')       {array_pop($tagStack);}
00765                                                                                 }
00766                                                                         } else {
00767                                                                                 array_push($tagRegister[$tagName],$c);
00768                                                                                 if ($tags[$tagName]['nesting']=='global')       {array_push($tagStack,$tagName);}
00769                                                                         }
00770                                                                 }
00772                                                                 if ($setTag)    {
00773                                                                                 // Setting the tag
00774                                                                         $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='&lt;');
00775                                                                 }
00776                                                         }
00777                                                 } else {
00778                                                         $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
00779                                                 }
00780                                         } elseif ($keepAll) {   // This is if the tag was not defined in the array for processing:
00781                                                 if (!strcmp($keepAll,'protect'))        {
00782                                                         $lt = '&lt;';   $gt = '&gt;';
00783                                                 } else {
00784                                                         $lt = '<';      $gt = '>';
00785                                                 }
00786                                                 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='&lt;');
00787                                         }
00788                                         $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
00789                                 } else {
00790                                         $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);      // There were not end-bracket, so no tag...
00791                                 }
00792                         } else {
00793                                 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);      // It was not a tag anyways
00794                         }
00795                 }
00797                         // Unsetting tags:
00798                 foreach ($tagRegister as $tag => $positions)    {
00799                         foreach ($positions as $pKey)   {
00800                                 unset($newContent[$pKey]);
00801                         }
00802                 }
00804                 return implode('',$newContent);
00805         }
00814         function bidir_htmlspecialchars($value,$dir)    {
00815                 if ($dir==1)    {
00816                         $value = htmlspecialchars($value);
00817                 } elseif ($dir==2)      {
00818                         $value = t3lib_div::deHSCentities(htmlspecialchars($value));
00819                 } elseif ($dir==-1) {
00820                         $value = str_replace('&gt;','>',$value);
00821                         $value = str_replace('&lt;','<',$value);
00822                         $value = str_replace('&quot;','"',$value);
00823                         $value = str_replace('&amp;','&',$value);
00824                 }
00825                 return $value;
00826         }
00837         function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')     {
00839                 $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
00840                 foreach ($parts as $k => $v)    {
00841                         if ($k%2)       {
00842                                 $params = $this->get_tag_attributes($v,1);
00843                                 $tagEnd = substr($v,-2)=='/>' ? ' />' : '>';    // Detect tag-ending so that it is re-applied correctly.
00844                                 $firstTagName = $this->getFirstTagName($v);     // The 'name' of the first tag
00845                                 $somethingDone=0;
00846                                 $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
00847                                 switch(strtolower($firstTagName))       {
00848                                                 // background - attribute:
00849                                         case 'td':
00850                                         case 'body':
00851                                         case 'table':
00852                                                 $src = $params[0]['background'];
00853                                                 if ($src)       {
00854                                                         $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
00855                                                         $somethingDone=1;
00856                                                 }
00857                                         break;
00858                                                 // src attribute
00859                                         case 'img':
00860                                         case 'input':
00861                                         case 'script':
00862                                         case 'embed':
00863                                                 $src = $params[0]['src'];
00864                                                 if ($src)       {
00865                                                         $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
00866                                                         $somethingDone=1;
00867                                                 }
00868                                         break;
00869                                         case 'link':
00870                                         case 'a':
00871                                                 $src = $params[0]['href'];
00872                                                 if ($src)       {
00873                                                         $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
00874                                                         $somethingDone=1;
00875                                                 }
00876                                         break;
00877                                                 // action attribute
00878                                         case 'form':
00879                                                 $src = $params[0]['action'];
00880                                                 if ($src)       {
00881                                                         $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
00882                                                         $somethingDone=1;
00883                                                 }
00884                                         break;
00885                                 }
00886                                 if ($somethingDone)     {
00887                                         $tagParts = preg_split('/\s+/s',$v,2);
00888                                         $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
00889                                         $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).$tagEnd;
00890                                 }
00891                         }
00892                 }
00893                 $content = implode('',$parts);
00895                         // Fix <style> section:
00896                 $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
00897                 if (strlen($prefix))    {
00898                         $parts = $this->splitIntoBlock('style',$content);
00899                         foreach($parts as $k => $v)     {
00900                                 if ($k%2)       {
00901                                         $parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
00902                                 }
00903                         }
00904                         $content = implode('',$parts);
00905                 }
00907                 return $content;
00908         }
00919         function prefixRelPath($prefix,$srcVal,$suffix='')      {
00920                 $pU = parse_url($srcVal);
00921                 if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/')       { // If not an absolute URL.
00922                         $srcVal = $prefix.$srcVal.$suffix;
00923                 }
00924                 return $srcVal;
00925         }
00937         function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)     {
00938                 $fontSplit = $this->splitIntoBlock('font',$value);      // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
00939                 foreach ($fontSplit as $k => $v)        {
00940                         if ($k%2)       {       // font:
00941                                 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
00942                                 $newAttribs=array();
00943                                 if ($keepFace && $attribArray['face'])  $newAttribs[]='face="'.$attribArray['face'].'"';
00944                                 if ($keepSize && $attribArray['size'])  $newAttribs[]='size="'.$attribArray['size'].'"';
00945                                 if ($keepColor && $attribArray['color'])        $newAttribs[]='color="'.$attribArray['color'].'"';
00947                                 $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
00948                                 if (count($newAttribs)) {
00949                                         $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
00950                                 } else {
00951                                         $fontSplit[$k]=$innerContent;
00952                                 }
00953                         }
00954                 }
00955                 return implode('',$fontSplit);
00956         }
00967         function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') {
00969                 foreach($tags as $from => $to)  {
00970                         $value = preg_replace('/'.preg_quote($ltChar).'(\/)?'.$from.'\s([^\>])*(\/)?\>/', $ltChar2.'$1'.$to.' $2$3>', $value);
00971                 }
00972                 return $value;
00973         }
00982         function unprotectTags($content,$tagList='')    {
00983                 $tagsArray = t3lib_div::trimExplode(',',$tagList,1);
00984                 $contentParts = explode('&lt;',$content);
00985                 next($contentParts);    // bypass the first
00986                 while(list($k,$tok)=each($contentParts))        {
00987                         $firstChar = substr($tok,0,1);
00988                         if (strcmp(trim($firstChar),''))        {
00989                                 $subparts = explode('&gt;',$tok,2);
00990                                 $tagEnd = strlen($subparts[0]);
00991                                 if (strlen($tok)!=$tagEnd)      {
00992                                         $endTag = $firstChar=='/' ? 1 : 0;
00993                                         $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00994                                         $tagParts = preg_split('/\s+/s',$tagContent,2);
00995                                         $tagName = strtolower($tagParts[0]);
00996                                         if (!strcmp($tagList,'') || in_array($tagName,$tagsArray))      {
00997                                                 $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
00998                                         } else $contentParts[$k] = '&lt;'.$tok;
00999                                 } else $contentParts[$k] = '&lt;'.$tok;
01000                         } else $contentParts[$k] = '&lt;'.$tok;
01001                 }
01003                 return implode('',$contentParts);
01004         }
01015         function stripTagsExcept($value,$tagList)       {
01016                 $tags=t3lib_div::trimExplode(',',$tagList,1);
01017                 $forthArr=array();
01018                 $backArr=array();
01019                 foreach ($tags as $theTag)      {
01020                         $forthArr[$theTag]=md5($theTag);
01021                         $backArr[md5($theTag)]=$theTag;
01022                 }
01023                 $value = $this->mapTags($value,$forthArr,'<','_');
01024                 $value=strip_tags($value);
01025                 $value = $this->mapTags($value,$backArr,'_','<');
01026                 return $value;
01027         }
01038         function caseShift($str,$flag,$cacheKey='')     {
01039                 $cacheKey .= $flag?1:0;
01040                 if (is_array($str))     {
01041                         if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey]))    {
01042                                 reset($str);
01043                                 foreach ($str as $k => $v)      {
01044                                         if (!$flag)     {
01045                                                 $str[$k] = strtoupper($v);
01046                                         }
01047                                 }
01048                                 if ($cacheKey)  $this->caseShift_cache[$cacheKey]=$str;
01049                         } else {
01050                                 $str = $this->caseShift_cache[$cacheKey];
01051                         }
01052                 } elseif (!$flag)       { $str = strtoupper($str); }
01053                 return $str;
01054         }
01065         function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)     {
01066                 $accu=array();
01067                 foreach ($tagAttrib as $k =>$v) {
01068                         if ($xhtmlClean)        {
01069                                 $attr=strtolower($k);
01070                                 if (strcmp($v,'') || isset($meta[$k]['dashType']))      {
01071                                         $attr.='="'.htmlspecialchars($v).'"';
01072                                 }
01073                         } else {
01074                                 $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
01075                                 if (strcmp($v,'') || isset($meta[$k]['dashType']))      {
01076                                         $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
01077                                         $attr.='='.$dash.$v.$dash;
01078                                 }
01079                         }
01080                         $accu[]=$attr;
01081                 }
01082                 return implode(' ',$accu);
01083         }
01093         function get_tag_attributes_classic($tag,$deHSC=0)      {
01094                 $attr=$this->get_tag_attributes($tag,$deHSC);
01095                 return is_array($attr[0])?$attr[0]:array();
01096         }
01106         function indentLines($content, $number=1, $indentChar="\t")     {
01107                 $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
01108                 $lines = explode(chr(10),str_replace(chr(13),'',$content));
01109                 foreach ($lines as $k => $v)    {
01110                         $lines[$k] = $preTab.$v;
01111                 }
01112                 return implode(chr(10), $lines);
01113         }
01123         function HTMLparserConfig($TSconfig,$keepTags=array())  {
01124                         // Allow tags (base list, merged with incoming array)
01125                 $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
01126                 $keepTags = array_merge($alTags,$keepTags);
01128                         // Set config properties.
01129                 if (is_array($TSconfig['tags.']))       {
01130                         reset($TSconfig['tags.']);
01131                         while(list($key,$tagC)=each($TSconfig['tags.']))        {
01132                                 if (!is_array($tagC) && $key==strtolower($key)) {
01133                                         if (!strcmp($tagC,'0')) unset($keepTags[$key]);
01134                                         if (!strcmp($tagC,'1') && !isset($keepTags[$key]))      $keepTags[$key]=1;
01135                                 }
01136                         }
01138                         reset($TSconfig['tags.']);
01139                         foreach ($TSconfig['tags.'] as $key => $tagC)   {
01140                                 if (is_array($tagC) && $key==strtolower($key))  {
01141                                         $key=substr($key,0,-1);
01142                                         if (!is_array($keepTags[$key])) $keepTags[$key]=array();
01143                                         if (is_array($tagC['fixAttrib.']))      {
01144                                                 reset($tagC['fixAttrib.']);
01145                                                 while(list($atName,$atConfig)=each($tagC['fixAttrib.']))        {
01146                                                         if (is_array($atConfig))        {
01147                                                                 $atName=substr($atName,0,-1);
01148                                                                 if (!is_array($keepTags[$key]['fixAttrib'][$atName]))   {
01149                                                                         $keepTags[$key]['fixAttrib'][$atName]=array();
01150                                                                 }
01151                                                                 $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig);           // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01152                                                                 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],''))  $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
01153                                                                 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],''))   $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
01154                                                         }
01155                                                 }
01156                                         }
01157                                         unset($tagC['fixAttrib.']);
01158                                         unset($tagC['fixAttrib']);
01159                                         $keepTags[$key] = array_merge($keepTags[$key],$tagC);                   // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01160                                 }
01161                         }
01162                 }
01163                         // localNesting
01164                 if ($TSconfig['localNesting'])  {
01165                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
01166                         while(list(,$tn)=each($lN))     {
01167                                 if (isset($keepTags[$tn]))      {
01168                                         $keepTags[$tn]['nesting']=1;
01169                                 }
01170                         }
01171                 }
01172                 if ($TSconfig['globalNesting']) {
01173                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
01174                         while(list(,$tn)=each($lN))     {
01175                                 if (isset($keepTags[$tn]))      {
01176                                         if (!is_array($keepTags[$tn]))  $keepTags[$tn]=array();
01177                                         $keepTags[$tn]['nesting']='global';
01178                                 }
01179                         }
01180                 }
01181                 if ($TSconfig['rmTagIfNoAttrib'])       {
01182                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
01183                         while(list(,$tn)=each($lN))     {
01184                                 if (isset($keepTags[$tn]))      {
01185                                         if (!is_array($keepTags[$tn]))  $keepTags[$tn]=array();
01186                                         $keepTags[$tn]['rmTagIfNoAttrib']=1;
01187                                 }
01188                         }
01189                 }
01190                 if ($TSconfig['noAttrib'])      {
01191                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
01192                         while(list(,$tn)=each($lN))     {
01193                                 if (isset($keepTags[$tn]))      {
01194                                         if (!is_array($keepTags[$tn]))  $keepTags[$tn]=array();
01195                                         $keepTags[$tn]['allowedAttribs']=0;
01196                                 }
01197                         }
01198                 }
01199                 if ($TSconfig['removeTags'])    {
01200                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
01201                         while(list(,$tn)=each($lN))     {
01202                                 $keepTags[$tn]=array();
01203                                 $keepTags[$tn]['allowedAttribs']=0;
01204                                 $keepTags[$tn]['rmTagIfNoAttrib']=1;
01205                         }
01206                 }
01208                         // Create additional configuration:
01209                 $addConfig=array();
01210                 if ($TSconfig['xhtml_cleaning'])        {
01211                         $addConfig['xhtml']=1;
01212                 }
01214                 return array(
01215                         $keepTags,
01216                         ''.$TSconfig['keepNonMatchedTags'],
01217                         intval($TSconfig['htmlSpecialChars']),
01218                         $addConfig
01219                 );
01220         }
01247         function XHTML_clean($content)  {
01248                 $content = $this->HTMLcleaner(
01249                         $content,
01250                         array(),        // No tags treated specially
01251                         1,                      // Keep ALL tags.
01252                         0,                      // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
01253                         array('xhtml' => 1)
01254                 );
01255                 return $content;
01256         }
01269         function processTag($value,$conf,$endTag,$protected=0)  {
01270                         // Return immediately if protected or no parameters
01271                 if ($protected || !count($conf))        return $value;
01272                         // OK then, begin processing for XHTML output:
01273                         // STILL VERY EXPERIMENTAL!!
01274                 if ($conf['xhtml'])     {
01275                         if ($endTag)    {       // Endtags are just set lowercase right away
01276                                 $value = strtolower($value);
01277                         } elseif (substr($value,0,4)!='<!--') { // ... and comments are ignored.
01278                                 $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1));    // Finding inner value with out < >
01279                                 list($tagName,$tagP)=preg_split('/\s+/s',$inValue,2);   // Separate attributes and tagname
01280                                 $tagName = strtolower($tagName);
01282                                         // Process attributes
01283                                 $tagAttrib = $this->get_tag_attributes($tagP);
01284                                 if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt']))            $tagAttrib[0]['alt']='';        // Set alt attribute for all images (not XHTML though...)
01285                                 if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type']))        $tagAttrib[0]['type']='text/javascript';        // Set type attribute for all script-tags
01286                                 $outA=array();
01287                                 reset($tagAttrib[0]);
01288                                 while(list($attrib_name,$attrib_value)=each($tagAttrib[0]))     {
01289                                                 // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
01290                                         $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
01291                                 }
01292                                 $newTag='<'.trim($tagName.' '.implode(' ',$outA));
01293                                         // All tags that are standalone (not wrapping, not having endtags) should be ended with '/>'
01294                                 if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>')     {
01295                                         $newTag.=' />';
01296                                 } else {
01297                                         $newTag.='>';
01298                                 }
01299                                 $value = $newTag;
01300                         }
01301                 }
01303                 return $value;
01304         }
01315         function processContent($value,$dir,$conf)      {
01316                 if ($dir!=0)    $value = $this->bidir_htmlspecialchars($value,$dir);
01317                 return $value;
01318         }
01319 }
01323 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']) {
01324         include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
01325 }
01326 ?>

