Documentation TYPO3 par Ameos

class.t3lib_parsehtml.php

00001 <?php
00002 /***************************************************************
00003 *  Copyright notice
00004 *
00005 *  (c) 1999-2005 Kasper Skaarhoj (kasperYYYY@typo3.com)
00006 *  All rights reserved
00007 *
00008 *  This script is part of the TYPO3 project. The TYPO3 project is
00009 *  free software; you can redistribute it and/or modify
00010 *  it under the terms of the GNU General Public License as published by
00011 *  the Free Software Foundation; either version 2 of the License, or
00012 *  (at your option) any later version.
00013 *
00014 *  The GNU General Public License can be found at
00015 *  http://www.gnu.org/copyleft/gpl.html.
00016 *  A copy is found in the textfile GPL.txt and important notices to the license
00017 *  from the author is found in LICENSE.txt distributed with these scripts.
00018 *
00019 *
00020 *  This script is distributed in the hope that it will be useful,
00021 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00022 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00023 *  GNU General Public License for more details.
00024 *
00025 *  This copyright notice MUST APPEAR in all copies of the script!
00026 ***************************************************************/
00106 class t3lib_parsehtml {
00107         var $caseShift_cache=array();
00108 
00109 
00110         // *******************************************'
00111         // COPY FROM class.tslib_content.php: / BEGIN
00112         // substituteSubpart
00113         // Cleaned locally 2/2003 !!!! (so different from tslib_content version)
00114         // *******************************************'
00115 
00123         function getSubpart($content, $marker)  {
00124                 if ($marker && strstr($content,$marker))        {
00125                         $start = strpos($content, $marker)+strlen($marker);
00126                         $stop = @strpos($content, $marker, $start+1);
00127                         $sub = substr($content, $start, $stop-$start);
00128 
00129                         $reg=Array();
00130                         ereg('^[^<]*-->',$sub,$reg);
00131                         $start+=strlen($reg[0]);
00132 
00133                         $reg=Array();
00134                         ereg('<!--[^>]*$',$sub,$reg);
00135                         $stop-=strlen($reg[0]);
00136 
00137                         return substr($content, $start, $stop-$start);
00138                 }
00139         }
00140 
00151         function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0) {
00152                 $start = strpos($content, $marker);
00153                 $stop = @strpos($content, $marker, $start+1)+strlen($marker);
00154                 if ($start && $stop>$start)     {
00155                         // code before
00156                         $before = substr($content, 0, $start);
00157                         $reg=Array();
00158                         ereg('<!--[^>]*$',$before,$reg);
00159                         $start-=strlen($reg[0]);
00160                         if ($keepMarker)        {
00161                                 $reg_k=Array();
00162                                 if ($reg[0])    ereg('^[^>]*-->',substr($content,$start),$reg_k);
00163                                 $before_marker = substr($content, $start, strlen($reg_k[0]?$reg_k[0]:$marker));
00164                         }
00165                         $before = substr($content, 0, $start);
00166                                 // code after
00167                         $after = substr($content, $stop);
00168                         $reg=Array();
00169                         ereg('^[^<]*-->',$after,$reg);
00170                         $stop+=strlen($reg[0]);
00171                         if ($keepMarker)        {
00172                                 $reg_k=Array();
00173                                 if ($reg[0])    ereg('<!--[^<]*$',substr($content,0,$stop),$reg_k);
00174                                 $sLen = strlen($reg_k[0]?$reg_k[0]:$marker);
00175                                 $after_marker = substr($content, $stop-$sLen,$sLen);
00176                         }
00177                         $after = substr($content, $stop);
00178 
00179 
00180                                 // replace?
00181                         if (is_array($subpartContent))  {
00182                                 $substContent=$subpartContent[0].$this->getSubpart($content,$marker).$subpartContent[1];
00183                         } else {
00184                                 $substContent=$subpartContent;
00185                         }
00186 
00187                         if ($recursive && strpos($after, $marker))      {
00188                                 return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$this->substituteSubpart($after,$marker,$subpartContent);
00189                         } else {
00190                                 return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$after;
00191                         }
00192                 } else {
00193                         return $content;
00194                 }
00195         }
00196         // *******************************************'
00197         // COPY FROM class.tslib_content.php: / END
00198         // *******************************************'
00199 
00200 
00201 
00202 
00203 
00204 
00205 
00206         /************************************
00207          *
00208          * Parsing HTML code
00209          *
00210          ************************************/
00211 
00223         function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0) {
00224                 $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
00225                 $regexStr = '</?('.implode('|',$tags).')(>|[[:space:]][^>]*>)';
00226 
00227                 $parts = spliti($regexStr,$content);
00228 
00229                 $newParts=array();
00230                 $pointer=strlen($parts[0]);
00231                 $buffer=$parts[0];
00232                 $nested=0;
00233                 reset($parts);
00234                 next($parts);
00235                 while(list($k,$v)=each($parts)) {
00236                         $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
00237                         $tagLen = strcspn(substr($content,$pointer),'>')+1;
00238 
00239                         if (!$isEndTag) {       // We meet a start-tag:
00240                                 if (!$nested)   {       // Ground level:
00241                                         $newParts[]=$buffer;    // previous buffer stored
00242                                         $buffer='';
00243                                 }
00244                                 $nested++;      // We are inside now!
00245                                 $mbuffer=substr($content,$pointer,strlen($v)+$tagLen);  // New buffer set and pointer increased
00246                                 $pointer+=strlen($mbuffer);
00247                                 $buffer.=$mbuffer;
00248                         } else {        // If we meet an endtag:
00249                                 $nested--;      // decrease nested-level
00250                                 $eliminated=0;
00251                                 if ($eliminateExtraEndTags && $nested<0)        {
00252                                         $nested=0;
00253                                         $eliminated=1;
00254                                 } else {
00255                                         $buffer.=substr($content,$pointer,$tagLen);     // In any case, add the endtag to current buffer and increase pointer
00256                                 }
00257                                 $pointer+=$tagLen;
00258                                 if (!$nested && !$eliminated)   {       // if we're back on ground level, (and not by eliminating tags...
00259                                         $newParts[]=$buffer;
00260                                         $buffer='';
00261                                 }
00262                                 $mbuffer=substr($content,$pointer,strlen($v));  // New buffer set and pointer increased
00263                                 $pointer+=strlen($mbuffer);
00264                                 $buffer.=$mbuffer;
00265                         }
00266 
00267                 }
00268                 $newParts[]=$buffer;
00269                 return $newParts;
00270         }
00271 
00284         function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)   {
00285                 $parts = $this->splitIntoBlock($tag,$content,TRUE);
00286                 foreach($parts as $k => $v)     {
00287                         if ($k%2)       {
00288                                 $firstTagName = $this->getFirstTagName($v, TRUE);
00289                                 $tagsArray = array();
00290                                 $tagsArray['tag_start'] = $this->getFirstTag($v);
00291                                 $tagsArray['tag_end'] = '</'.$firstTagName.'>';
00292                                 $tagsArray['tag_name'] = strtolower($firstTagName);
00293                                 $tagsArray['add_level'] = 1;
00294                                 $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
00295 
00296                                 if ($callBackTags)      $tagsArray = $procObj->$callBackTags($tagsArray,$level);
00297 
00298                                 $parts[$k] =
00299                                         $tagsArray['tag_start'].
00300                                         $tagsArray['content'].
00301                                         $tagsArray['tag_end'];
00302                         } else {
00303                                 if ($callBackContent)   $parts[$k] = $procObj->$callBackContent($parts[$k],$level);
00304                         }
00305                 }
00306 
00307                 return implode('',$parts);
00308         }
00309 
00320         function splitTags($tag,$content)       {
00321                 $tags = t3lib_div::trimExplode(',',$tag,1);
00322                 $regexStr = '<('.implode('|',$tags).')(>|\/>|[[:space:]][^>]*>)';
00323                 $parts = spliti($regexStr,$content);
00324 
00325                 $pointer = strlen($parts[0]);
00326                 $newParts = array();
00327                 $newParts[] = $parts[0];
00328                 reset($parts);
00329                 next($parts);
00330                 while(list($k,$v)=each($parts)) {
00331                         $tagLen = strcspn(substr($content,$pointer),'>')+1;
00332 
00333                                 // Set tag:
00334                         $tag = substr($content,$pointer,$tagLen);       // New buffer set and pointer increased
00335                         $newParts[] = $tag;
00336                         $pointer+= strlen($tag);
00337 
00338                                 // Set content:
00339                         $newParts[] = $v;
00340                         $pointer+= strlen($v);
00341                 }
00342                 return $newParts;
00343         }
00344 
00354         function getAllParts($parts,$tag_parts=1,$include_tag=1)        {
00355                 reset($parts);
00356                 $newParts=array();
00357                 while(list($k,$v)=each($parts)) {
00358                         if (($k+($tag_parts?0:1))%2)    {
00359                                 if (!$include_tag)      $v=$this->removeFirstAndLastTag($v);
00360                                 $newParts[]=$v;
00361                         }
00362                 }
00363                 return $newParts;
00364         }
00365 
00373         function removeFirstAndLastTag($str)    {
00374                         // First:
00375                 $endLen = strcspn($str,'>')+1;
00376                 $str = substr($str,$endLen);
00377                         // Last:
00378                 $str = strrev($str);
00379                 $endLen = strcspn($str,'<')+1;
00380                 $str = substr($str,$endLen);
00381                         // return
00382                 return strrev($str);
00383         }
00384 
00392         function getFirstTag($str)      {
00393                         // First:
00394                 $endLen = strcspn($str,'>')+1;
00395                 $str = substr($str,0,$endLen);
00396                 return $str;
00397         }
00398 
00407         function getFirstTagName($str,$preserveCase=FALSE)      {
00408                 list($tag) = split('[[:space:]]',substr(trim($this->getFirstTag($str)),1,-1), 2);
00409                 if (!$preserveCase)     $tag = strtoupper($tag);
00410 
00411                 return trim($tag);
00412         }
00413 
00422         function get_tag_attributes($tag,$deHSC=0)      {
00423                 list($components,$metaC) = $this->split_tag_attributes($tag);
00424                 $name = '';      // attribute name is stored here
00425                 $valuemode = '';
00426                 $attributes = array();
00427                 $attributesMeta = array();
00428                 if (is_array($components))      {
00429                         while (list($key,$val) = each ($components))    {
00430                                 if ($val != '=')        {       // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
00431                                         if ($valuemode) {
00432                                                 if ($name)      {
00433                                                         $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
00434                                                         $attributesMeta[$name]['dashType']=$metaC[$key];
00435                                                         $name = '';
00436                                                 }
00437                                         } else {
00438                                                 if ($namekey = ereg_replace('[^a-zA-Z0-9_:-]','',$val)) {
00439                                                         $name = strtolower($namekey);
00440                                                         $attributesMeta[$name]=array();
00441                                                         $attributesMeta[$name]['origTag']=$namekey;
00442                                                         $attributes[$name] = '';
00443                                                 }
00444                                         }
00445                                         $valuemode = '';
00446                                 } else {
00447                                         $valuemode = 'on';
00448                                 }
00449                         }
00450                         if (is_array($attributes))      reset($attributes);
00451                         return array($attributes,$attributesMeta);
00452                 }
00453         }
00454 
00464         function split_tag_attributes($tag)     {
00465                 $tag_tmp = trim(eregi_replace ('^<[^[:space:]]*','',trim($tag)));
00466                         // Removes any > in the end of the string
00467                 $tag_tmp = trim(eregi_replace ('>$','',$tag_tmp));
00468 
00469                 $metaValue = array();
00470                 $value = array();
00471                 while (strcmp($tag_tmp,''))     {       // Compared with empty string instead , 030102
00472                         $firstChar=substr($tag_tmp,0,1);
00473                         if (!strcmp($firstChar,'"') || !strcmp($firstChar,"'")) {
00474                                 $reg=explode($firstChar,$tag_tmp,3);
00475                                 $value[]=$reg[1];
00476                                 $metaValue[]=$firstChar;
00477                                 $tag_tmp=trim($reg[2]);
00478                         } elseif (!strcmp($firstChar,'=')) {
00479                                 $value[] = '=';
00480                                 $metaValue[]='';
00481                                 $tag_tmp = trim(substr($tag_tmp,1));            // Removes = chars.
00482                         } else {
00483                                         // There are '' around the value. We look for the next ' ' or '>'
00484                                 $reg = split('[[:space:]=]',$tag_tmp,2);
00485                                 $value[] = trim($reg[0]);
00486                                 $metaValue[]='';
00487                                 $tag_tmp = trim(substr($tag_tmp,strlen($reg[0]),1).$reg[1]);
00488                         }
00489                 }
00490                 if (is_array($value))   reset($value);
00491                 return array($value,$metaValue);
00492         }
00493 
00507         function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')       {
00508                 $content = strtolower($content);
00509                 $analyzedOutput=array();
00510                 $analyzedOutput['counts']=array();      // Counts appearances of start-tags
00511                 $analyzedOutput['errors']=array();      // Lists ERRORS
00512                 $analyzedOutput['warnings']=array();    // Lists warnings.
00513                 $analyzedOutput['blocks']=array();      // Lists stats for block-tags
00514                 $analyzedOutput['solo']=array();        // Lists stats for solo-tags
00515 
00516                         // Block tags, must have endings...
00517                 $blockTags = explode(',',$blockTags);
00518                 foreach($blockTags as $tagName) {
00519                         $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
00520                         $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
00521                         $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
00522                         if ($countBegin)        $analyzedOutput['counts'][$tagName]=$countBegin;
00523                         if ($countBegin-$countEnd)      {
00524                                 if ($countBegin-$countEnd > 0)  {
00525                                         $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
00526                                 } else {
00527                                         $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
00528                                 }
00529                         }
00530                 }
00531 
00532                         // Solo tags, must NOT have endings...
00533                 $soloTags = explode(',',$soloTags);
00534                 foreach($soloTags as $tagName)  {
00535                         $countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
00536                         $countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
00537                         $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
00538                         if ($countBegin)        $analyzedOutput['counts'][$tagName]=$countBegin;
00539                         if ($countEnd)  {
00540                                 $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
00541                         }
00542                 }
00543 
00544                 return $analyzedOutput;
00545         }
00546 
00547 
00548 
00549 
00550 
00551 
00552 
00553 
00554 
00555 
00556 
00557 
00558         /*********************************
00559          *
00560          * Clean HTML code
00561          *
00562          *********************************/
00563 
00600         function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())      {
00601                 $newContent = array();
00602                 $tokArr = explode('<',$content);
00603                 $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
00604                 next($tokArr);
00605 
00606                 $c = 1;
00607                 $tagRegister = array();
00608                 $tagStack = array();
00609                 while(list(,$tok)=each($tokArr))        {
00610                         $firstChar = substr($tok,0,1);
00611 #                       if (strcmp(trim($firstChar),''))        {               // It is a tag...
00612                         if (ereg('[[:alnum:]\/]',$firstChar))   {               // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
00613                                 $tagEnd = strcspn($tok,'>');
00614                                 if (strlen($tok)!=$tagEnd)      {       // If there is and end-bracket...
00615                                         $endTag = $firstChar=='/' ? 1 : 0;
00616                                         $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00617                                         $tagParts = split('[[:space:]]',$tagContent,2);
00618                                         $tagName = strtolower($tagParts[0]);
00619                                         if (isset($tags[$tagName]))     {
00620                                                 if (is_array($tags[$tagName]))  {       // If there is processing to do for the tag:
00621 
00622                                                         if (!$endTag)   {       // If NOT an endtag, do attribute processing (added dec. 2003)
00623                                                                         // Override attributes
00624                                                                 if (strcmp($tags[$tagName]['overrideAttribs'],''))      {
00625                                                                         $tagParts[1]=$tags[$tagName]['overrideAttribs'];
00626                                                                 }
00627 
00628                                                                         // Allowed tags
00629                                                                 if (strcmp($tags[$tagName]['allowedAttribs'],''))       {
00630                                                                         if (!strcmp($tags[$tagName]['allowedAttribs'],'0'))     {       // No attribs allowed
00631                                                                                 $tagParts[1]='';
00632                                                                         } elseif (trim($tagParts[1])) {
00633                                                                                 $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00634                                                                                 $tagParts[1]='';
00635                                                                                 $newTagAttrib = array();
00636                                                                                 $tList = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
00637                                                                                 while(list(,$allowTag)=each($tList))    {
00638                                                                                         if (isset($tagAttrib[0][$allowTag]))    $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
00639                                                                                 }
00640                                                                                 $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
00641                                                                         }
00642                                                                 }
00643 
00644                                                                         // Fixed attrib values
00645                                                                 if (is_array($tags[$tagName]['fixAttrib']))     {
00646                                                                         $tagAttrib = $this->get_tag_attributes($tagParts[1]);
00647                                                                         $tagParts[1]='';
00648                                                                         reset($tags[$tagName]['fixAttrib']);
00649                                                                         while(list($attr,$params)=each($tags[$tagName]['fixAttrib']))   {
00650                                                                                 if (strlen($params['set']))     $tagAttrib[0][$attr] = $params['set'];
00651                                                                                 if (strlen($params['unset']))   unset($tagAttrib[0][$attr]);
00652                                                                                 if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr]))      $tagAttrib[0][$attr]=$params['default'];
00653                                                                                 if ($params['always'] || isset($tagAttrib[0][$attr]))   {
00654                                                                                         if ($params['trim'])    {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
00655                                                                                         if ($params['intval'])  {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
00656                                                                                         if ($params['lower'])   {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
00657                                                                                         if ($params['upper'])   {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
00658                                                                                         if ($params['range'])   {
00659                                                                                                 if (isset($params['range'][1])) {
00660                                                                                                         $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
00661                                                                                                 } else {
00662                                                                                                         $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
00663                                                                                                 }
00664                                                                                         }
00665                                                                                         if (is_array($params['list']))  {
00666                                                                                                 if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName)))     $tagAttrib[0][$attr]=$params['list'][0];
00667                                                                                         }
00668                                                                                         if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],'')))        {
00669                                                                                                 unset($tagAttrib[0][$attr]);
00670                                                                                         }
00671                                                                                         if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp'])))      {
00672                                                                                                 unset($tagAttrib[0][$attr]);
00673                                                                                         }
00674                                                                                         if ($params['prefixLocalAnchors'])      {
00675                                                                                                 if (substr($tagAttrib[0][$attr],0,1)=='#')      {
00676                                                                                                         $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
00677                                                                                                         $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
00678                                                                                                         if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL')))           {
00679                                                                                                                 $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
00680                                                                                                         }
00681                                                                                                 }
00682                                                                                         }
00683                                                                                         if ($params['prefixRelPathWith'])       {
00684                                                                                                 $urlParts = parse_url($tagAttrib[0][$attr]);
00685                                                                                                 if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/') {       // If it is NOT an absolute URL (by http: or starting "/")
00686                                                                                                         $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
00687                                                                                                 }
00688                                                                                         }
00689                                                                                         if ($params['userFunc'])        {
00690                                                                                                 $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
00691                                                                                         }
00692                                                                                 }
00693                                                                         }
00694                                                                         $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
00695                                                                 }
00696                                                         } else {        // If endTag, remove any possible attributes:
00697                                                                 $tagParts[1]='';
00698                                                         }
00699 
00700                                                                 // Protecting the tag by converting < and > to &lt; and &gt; ??
00701                                                         if ($tags[$tagName]['protect']) {
00702                                                                 $lt = '&lt;';   $gt = '&gt;';
00703                                                         } else {
00704                                                                 $lt = '<';      $gt = '>';
00705                                                         }
00706                                                                 // Remapping tag name?
00707                                                         if ($tags[$tagName]['remap'])   $tagParts[0] = $tags[$tagName]['remap'];
00708 
00709                                                                 // rmTagIfNoAttrib
00710                                                         if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib'])       {
00711                                                                 $setTag=1;
00712 
00713                                                                 if ($tags[$tagName]['nesting']) {
00714                                                                         if (!is_array($tagRegister[$tagName]))  $tagRegister[$tagName]=array();
00715 
00716                                                                         if ($endTag)    {
00717 /*                                                                              if ($tags[$tagName]['nesting']=='global')       {
00718                                                                                         $lastEl = end($tagStack);
00719                                                                                         $correctTag = !strcmp($tagName,$lastEl);
00720                                                                                 } else $correctTag=1;
00721         */
00722                                                                                 $correctTag=1;
00723                                                                                 if ($tags[$tagName]['nesting']=='global')       {
00724                                                                                         $lastEl = end($tagStack);
00725                                                                                         if (strcmp($tagName,$lastEl))   {
00726                                                                                                 if (in_array($tagName,$tagStack))       {
00727                                                                                                         while(count($tagStack) && strcmp($tagName,$lastEl))     {
00728                                                                                                                 $elPos = end($tagRegister[$lastEl]);
00729                                                                                                                 unset($newContent[$elPos]);
00730 
00731                                                                                                                 array_pop($tagRegister[$lastEl]);
00732                                                                                                                 array_pop($tagStack);
00733                                                                                                                 $lastEl = end($tagStack);
00734                                                                                                         }
00735                                                                                                 } else {
00736                                                                                                         $correctTag=0;  // In this case the
00737                                                                                                 }
00738                                                                                         }
00739                                                                                 }
00740                                                                                 if (!count($tagRegister[$tagName]) || !$correctTag)     {
00741                                                                                         $setTag=0;
00742                                                                                 } else {
00743                                                                                         array_pop($tagRegister[$tagName]);
00744                                                                                         if ($tags[$tagName]['nesting']=='global')       {array_pop($tagStack);}
00745                                                                                 }
00746                                                                         } else {
00747                                                                                 array_push($tagRegister[$tagName],$c);
00748                                                                                 if ($tags[$tagName]['nesting']=='global')       {array_push($tagStack,$tagName);}
00749                                                                         }
00750                                                                 }
00751 
00752                                                                 if ($setTag)    {
00753                                                                                 // Setting the tag
00754                                                                         $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='&lt;');
00755                                                                 }
00756                                                         }
00757                                                 } else {
00758                                                         $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
00759                                                 }
00760                                         } elseif ($keepAll) {   // This is if the tag was not defined in the array for processing:
00761                                                 if (!strcmp($keepAll,'protect'))        {
00762                                                         $lt = '&lt;';   $gt = '&gt;';
00763                                                 } else {
00764                                                         $lt = '<';      $gt = '>';
00765                                                 }
00766                                                 $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='&lt;');
00767                                         }
00768                                         $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
00769                                 } else {
00770                                         $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);      // There were not end-bracket, so no tag...
00771                                 }
00772                         } else {
00773                                 $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);      // It was not a tag anyways
00774                         }
00775                 }
00776 
00777                         // Unsetting tags:
00778                 reset($tagRegister);
00779                 while(list($tag,$positions)=each($tagRegister)) {
00780                         reset($positions);
00781                         while(list(,$pKey)=each($positions))    {
00782                                 unset($newContent[$pKey]);
00783                         }
00784                 }
00785 
00786                 return implode('',$newContent);
00787         }
00788 
00796         function bidir_htmlspecialchars($value,$dir)    {
00797                 if ($dir==1)    {
00798                         $value = htmlspecialchars($value);
00799                 } elseif ($dir==2)      {
00800                         $value = t3lib_div::deHSCentities(htmlspecialchars($value));
00801                 } elseif ($dir==-1) {
00802                         $value = str_replace('&gt;','>',$value);
00803                         $value = str_replace('&lt;','<',$value);
00804                         $value = str_replace('&quot;','"',$value);
00805                         $value = str_replace('&amp;','&',$value);
00806                 }
00807                 return $value;
00808         }
00809 
00819         function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')     {
00820 
00821                 $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
00822                 foreach($parts as $k => $v)     {
00823                         if ($k%2)       {
00824                                 $params = $this->get_tag_attributes($v,1);
00825                                 $tagEnd = substr($v,-2)=='/>' ? ' />' : '>';    // Detect tag-ending so that it is re-applied correctly.
00826                                 $firstTagName = $this->getFirstTagName($v);     // The 'name' of the first tag
00827                                 $somethingDone=0;
00828                                 $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
00829                                 switch(strtolower($firstTagName))       {
00830                                                 // background - attribute:
00831                                         case 'td':
00832                                         case 'body':
00833                                         case 'table':
00834                                                 $src = $params[0]['background'];
00835                                                 if ($src)       {
00836                                                         $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
00837                                                         $somethingDone=1;
00838                                                 }
00839                                         break;
00840                                                 // src attribute
00841                                         case 'img':
00842                                         case 'input':
00843                                         case 'script':
00844                                         case 'embed':
00845                                                 $src = $params[0]['src'];
00846                                                 if ($src)       {
00847                                                         $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
00848                                                         $somethingDone=1;
00849                                                 }
00850                                         break;
00851                                         case 'link':
00852                                         case 'a':
00853                                                 $src = $params[0]['href'];
00854                                                 if ($src)       {
00855                                                         $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
00856                                                         $somethingDone=1;
00857                                                 }
00858                                         break;
00859                                                 // action attribute
00860                                         case 'form':
00861                                                 $src = $params[0]['action'];
00862                                                 if ($src)       {
00863                                                         $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
00864                                                         $somethingDone=1;
00865                                                 }
00866                                         break;
00867                                 }
00868                                 if ($somethingDone)     {
00869                                         $tagParts = split('[[:space:]]',$v,2);
00870                                         $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
00871                                         $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).
00872                                                                         $tagEnd;
00873                                 }
00874                         }
00875                 }
00876                 $content = implode('',$parts);
00877 
00878                         // Fix <style> section:
00879                 $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
00880                 if (strlen($prefix))    {
00881                         $parts = $this->splitIntoBlock('style',$content);
00882                         foreach($parts as $k => $v)     {
00883                                 if ($k%2)       {
00884                                         $parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
00885                                 }
00886                         }
00887                         $content = implode('',$parts);
00888                 }
00889 
00890                 return $content;
00891         }
00892 
00902         function prefixRelPath($prefix,$srcVal,$suffix='')      {
00903                 $pU = parse_url($srcVal);
00904                 if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/')       { // If not an absolute URL.
00905                         $srcVal = $prefix.$srcVal.$suffix;
00906                 }
00907                 return $srcVal;
00908         }
00909 
00920         function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)     {
00921                 $fontSplit = $this->splitIntoBlock('font',$value);      // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
00922                 reset($fontSplit);
00923                 while(list($k,$v)=each($fontSplit))     {
00924                         if ($k%2)       {       // font:
00925                                 $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
00926                                 $newAttribs=array();
00927                                 if ($keepFace && $attribArray['face'])  $newAttribs[]='face="'.$attribArray['face'].'"';
00928                                 if ($keepSize && $attribArray['size'])  $newAttribs[]='size="'.$attribArray['size'].'"';
00929                                 if ($keepColor && $attribArray['color'])        $newAttribs[]='color="'.$attribArray['color'].'"';
00930 
00931                                 $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
00932                                 if (count($newAttribs)) {
00933                                         $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
00934                                 } else {
00935                                         $fontSplit[$k]=$innerContent;
00936                                 }
00937                         }
00938                 }
00939                 return implode('',$fontSplit);
00940         }
00941 
00951         function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<') {
00952 
00953                 foreach($tags as $from => $to)  {
00954                         $value = eregi_replace($ltChar.$from.'>',$ltChar2.$to.'>',$value);
00955                         $value = eregi_replace($ltChar.$from.'[[:space:]]([^>]*)>',$ltChar2.$to.' \\1>',$value);
00956                         $value = eregi_replace($ltChar.'\/'.$from.'[^>]*>',$ltChar2.'/'.$to.'>',$value);
00957                 }
00958                 return $value;
00959         }
00960 
00968         function unprotectTags($content,$tagList='')    {
00969                 $tagsArray = t3lib_div::trimExplode(',',$tagList,1);
00970                 $contentParts = explode('&lt;',$content);
00971                 next($contentParts);    // bypass the first
00972                 while(list($k,$tok)=each($contentParts))        {
00973                         $firstChar = substr($tok,0,1);
00974                         if (strcmp(trim($firstChar),''))        {
00975                                 $subparts = explode('&gt;',$tok,2);
00976                                 $tagEnd = strlen($subparts[0]);
00977                                 if (strlen($tok)!=$tagEnd)      {
00978                                         $endTag = $firstChar=='/' ? 1 : 0;
00979                                         $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
00980                                         $tagParts = split('[[:space:]]',$tagContent,2);
00981                                         $tagName = strtolower($tagParts[0]);
00982                                         if (!strcmp($tagList,'') || in_array($tagName,$tagsArray))      {
00983                                                 $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
00984                                         } else $contentParts[$k] = '&lt;'.$tok;
00985                                 } else $contentParts[$k] = '&lt;'.$tok;
00986                         } else $contentParts[$k] = '&lt;'.$tok;
00987                 }
00988 
00989                 return implode('',$contentParts);
00990         }
00991 
01001         function stripTagsExcept($value,$tagList)       {
01002                 $tags=t3lib_div::trimExplode(',',$tagList,1);
01003                 $forthArr=array();
01004                 $backArr=array();
01005                 while(list(,$theTag)=each($tags))       {
01006                         $forthArr[$theTag]=md5($theTag);
01007                         $backArr[md5($theTag)]=$theTag;
01008                 }
01009                         $value = $this->mapTags($value,$forthArr,'<','_');
01010                         $value=strip_tags($value);
01011                         $value = $this->mapTags($value,$backArr,'_','<');
01012                 return $value;
01013         }
01014 
01024         function caseShift($str,$flag,$cacheKey='')     {
01025                 if (is_array($str))     {
01026                         if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey]))    {
01027                                 reset($str);
01028                                 while(list($k)=each($str))      {
01029                                         $str[$k] = strtoupper($str[$k]);
01030                                 }
01031                                 if ($cacheKey)  $this->caseShift_cache[$cacheKey]=$str;
01032                         } else {
01033                                 $str = $this->caseShift_cache[$cacheKey];
01034                         }
01035                 } elseif (!$flag)       $str = strtoupper($str);
01036                 return $str;
01037         }
01038 
01048         function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)     {
01049                 $accu=array();
01050                 reset($tagAttrib);
01051                 while(list($k,$v)=each($tagAttrib))     {
01052                         if ($xhtmlClean)        {
01053                                 $attr=strtolower($k);
01054                                 if (strcmp($v,'') || isset($meta[$k]['dashType']))      {
01055                                         $attr.='="'.htmlspecialchars($v).'"';
01056                                 }
01057                         } else {
01058                                 $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
01059                                 if (strcmp($v,'') || isset($meta[$k]['dashType']))      {
01060                                         $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
01061                                         $attr.='='.$dash.$v.$dash;
01062                                 }
01063                         }
01064                         $accu[]=$attr;
01065                 }
01066                 return implode(' ',$accu);
01067         }
01068 
01077         function get_tag_attributes_classic($tag,$deHSC=0)      {
01078                 $attr=$this->get_tag_attributes($tag,$deHSC);
01079                 return is_array($attr[0])?$attr[0]:array();
01080         }
01081 
01090         function indentLines($content, $number=1, $indentChar="\t")     {
01091                 $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
01092                 $lines = explode(chr(10),str_replace(chr(13),'',$content));
01093                 while(list($k,$v) = each($lines))       {
01094                         $lines[$k] = $preTab.$v;
01095                 }
01096                 return implode(chr(10), $lines);
01097         }
01098 
01107         function HTMLparserConfig($TSconfig,$keepTags=array())  {
01108                         // Allow tags (base list, merged with incoming array)
01109                 $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
01110                 $keepTags = array_merge($alTags,$keepTags);
01111 
01112                         // Set config properties.
01113                 if (is_array($TSconfig['tags.']))       {
01114                         reset($TSconfig['tags.']);
01115                         while(list($key,$tagC)=each($TSconfig['tags.']))        {
01116                                 if (!is_array($tagC) && $key==strtolower($key)) {
01117                                         if (!strcmp($tagC,'0')) unset($keepTags[$key]);
01118                                         if (!strcmp($tagC,'1') && !isset($keepTags[$key]))      $keepTags[$key]=1;
01119                                 }
01120                         }
01121 
01122                         reset($TSconfig['tags.']);
01123                         while(list($key,$tagC)=each($TSconfig['tags.']))        {
01124                                 if (is_array($tagC) && $key==strtolower($key))  {
01125                                         $key=substr($key,0,-1);
01126                                         if (!is_array($keepTags[$key])) $keepTags[$key]=array();
01127                                         if (is_array($tagC['fixAttrib.']))      {
01128                                                 reset($tagC['fixAttrib.']);
01129                                                 while(list($atName,$atConfig)=each($tagC['fixAttrib.']))        {
01130                                                         if (is_array($atConfig))        {
01131                                                                 $atName=substr($atName,0,-1);
01132                                                                 if (!is_array($keepTags[$key]['fixAttrib'][$atName]))   {
01133                                                                         $keepTags[$key]['fixAttrib'][$atName]=array();
01134                                                                 }
01135                                                                 $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig);           // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01136                                                                 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],''))  $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
01137                                                                 if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],''))   $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
01138                                                         }
01139                                                 }
01140                                         }
01141                                         unset($tagC['fixAttrib.']);
01142                                         unset($tagC['fixAttrib']);
01143                                         $keepTags[$key] = array_merge($keepTags[$key],$tagC);                   // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
01144                                 }
01145                         }
01146                 }
01147                         // localNesting
01148                 if ($TSconfig['localNesting'])  {
01149                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
01150                         while(list(,$tn)=each($lN))     {
01151                                 if (isset($keepTags[$tn]))      {
01152                                         $keepTags[$tn]['nesting']=1;
01153                                 }
01154                         }
01155                 }
01156                 if ($TSconfig['globalNesting']) {
01157                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
01158                         while(list(,$tn)=each($lN))     {
01159                                 if (isset($keepTags[$tn]))      {
01160                                         if (!is_array($keepTags[$tn]))  $keepTags[$tn]=array();
01161                                         $keepTags[$tn]['nesting']='global';
01162                                 }
01163                         }
01164                 }
01165                 if ($TSconfig['rmTagIfNoAttrib'])       {
01166                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
01167                         while(list(,$tn)=each($lN))     {
01168                                 if (isset($keepTags[$tn]))      {
01169                                         if (!is_array($keepTags[$tn]))  $keepTags[$tn]=array();
01170                                         $keepTags[$tn]['rmTagIfNoAttrib']=1;
01171                                 }
01172                         }
01173                 }
01174                 if ($TSconfig['noAttrib'])      {
01175                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
01176                         while(list(,$tn)=each($lN))     {
01177                                 if (isset($keepTags[$tn]))      {
01178                                         if (!is_array($keepTags[$tn]))  $keepTags[$tn]=array();
01179                                         $keepTags[$tn]['allowedAttribs']=0;
01180                                 }
01181                         }
01182                 }
01183                 if ($TSconfig['removeTags'])    {
01184                         $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
01185                         while(list(,$tn)=each($lN))     {
01186                                 $keepTags[$tn]=array();
01187                                 $keepTags[$tn]['allowedAttribs']=0;
01188                                 $keepTags[$tn]['rmTagIfNoAttrib']=1;
01189                         }
01190                 }
01191 
01192                         // Create additional configuration:
01193                 $addConfig=array();
01194                 if ($TSconfig['xhtml_cleaning'])        {
01195                         $addConfig['xhtml']=1;
01196                 }
01197 
01198                 return array(
01199                         $keepTags,
01200                         ''.$TSconfig['keepNonMatchedTags'],
01201                         intval($TSconfig['htmlSpecialChars']),
01202                         $addConfig
01203                 );
01204         }
01205 
01231         function XHTML_clean($content)  {
01232                 $content = $this->HTMLcleaner(
01233                         $content,
01234                         array(),        // No tags treated specially
01235                         1,                      // Keep ALL tags.
01236                         0,                      // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
01237                         array('xhtml' => 1)
01238                 );
01239                 return $content;
01240         }
01241 
01253         function processTag($value,$conf,$endTag,$protected=0)  {
01254                         // Return immediately if protected or no parameters
01255                 if ($protected || !count($conf))        return $value;
01256                         // OK then, begin processing for XHTML output:
01257                         // STILL VERY EXPERIMENTAL!!
01258                 if ($conf['xhtml'])     {
01259                         if ($endTag)    {       // Endtags are just set lowercase right away
01260                                 $value = strtolower($value);
01261                         } elseif (substr($value,0,2)!='<!') {   // ... and comments are ignored.
01262                                 $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1));    // Finding inner value with out < >
01263                                 list($tagName,$tagP)=split('[[:space:]]',$inValue,2);   // Separate attributes and tagname
01264                                 $tagName = strtolower($tagName);
01265 
01266                                         // Process attributes
01267                                 $tagAttrib = $this->get_tag_attributes($tagP);
01268                                 if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt']))            $tagAttrib[0]['alt']='';        // Set alt attribute for all images (not XHTML though...)
01269                                 if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type']))        $tagAttrib[0]['type']='text/javascript';        // Set type attribute for all script-tags
01270                                 $outA=array();
01271                                 reset($tagAttrib[0]);
01272                                 while(list($attrib_name,$attrib_value)=each($tagAttrib[0]))     {
01273                                                 // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
01274                                         $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
01275                                 }
01276                                 $newTag='<'.trim($tagName.' '.implode(' ',$outA));
01277                                         // All tags that are standalone (not wrapping, not having endtags) should be ended with '/>'
01278                                 if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>')     {
01279                                         $newTag.=' />';
01280                                 } else {
01281                                         $newTag.='>';
01282                                 }
01283                                 $value = $newTag;
01284                         }
01285                 }
01286 
01287                 return $value;
01288         }
01289 
01299         function processContent($value,$dir,$conf)      {
01300                 if ($dir!=0)    $value = $this->bidir_htmlspecialchars($value,$dir);
01301                 return $value;
01302         }
01303 }
01304 
01305 
01306 
01307 if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']) {
01308         include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
01309 }
01310 ?>


Généré par TYPO3 Ameos avec  doxygen 1.4.6