Documentation TYPO3 par Ameos

class.doublemetaphone.php

00001 <?php
00002 // VERSION DoubleMetaphone Class 1.01
00003 //
00004 // DESCRIPTION
00005 //
00006 //   This class implements a "sounds like" algorithm developed
00007 //   by Lawrence Philips which he published in the June, 2000 issue
00008 //   of C/C++ Users Journal.  Double Metaphone is an improved
00009 //   version of Philips' original Metaphone algorithm.
00010 //
00011 // COPYRIGHT
00012 //
00013 //   Copyright 2001, Stephen Woodbridge <woodbri@swoodbridge.com>
00014 //   All rights reserved.
00015 //
00016 //   http://swoodbridge.com/DoubleMetaPhone/
00017 //
00018 //   This PHP translation is based heavily on the C implementation
00019 //   by Maurice Aubrey <maurice@hevanet.com>, which in turn
00020 //   is based heavily on the C++ implementation by
00021 //   Lawrence Philips and incorporates several bug fixes courtesy
00022 //   of Kevin Atkinson <kevina@users.sourceforge.net>.
00023 //
00024 //   This module is free software; you may redistribute it and/or
00025 //   modify it under the same terms as Perl itself.
00026 //
00027 // CONTRIBUTIONS
00028 //
00029 //   17-May-2002 Geoff Caplan  http://www.advantae.com
00030 //     Bug fix: added code to return class object which I forgot to do
00031 //     Created a functional callable version instead of the class version
00032 //     which is faster if you are calling this a lot.
00033 //
00034 // ------------------------------------------------------------------
00035 
00036 
00037 
00038 // TYPO3: Had to change name to "user_DoubleMetaPhone" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so:
00039 // TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you simply configure TYPO3 so by setting the line below in your localconf.php file:
00040 // TYPO3:                       $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
00041 // TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example.
00042 
00043 class user_DoubleMetaPhone
00044 {
00045 //  properties
00046 
00047    var $original  = "";
00048    var $primary   = "";
00049    var $secondary = "";
00050    var $length    =  0;
00051    var $last      =  0;
00052    var $current   =  0;
00053 
00054 //  methods
00055 
00056                 // TYPO3 specific API to this class. BEGIN
00057         function metaphone($string,$sys_language_uid=0) {
00058                 $res = $this->DoubleMetaPhone($string);
00059                 #debug(array($string,$res['primary']));
00060                 return $res['primary'];
00061         }
00062                 // TYPO3 specific API to this class. END
00063 
00064 
00065   // Public method
00066 
00067   function DoubleMetaPhone($string) {
00068 
00069    $this->primary   = "";
00070    $this->secondary = "";
00071    $this->current   =  0;
00072 
00073     $this->current  = 0;
00074     $this->length   = strlen($string);
00075     $this->last     = $this->length - 1;
00076     $this->original = $string . "     ";
00077 
00078     $this->original = strtoupper($this->original);
00079 
00080     // skip this at beginning of word
00081     if ($this->StringAt($this->original, 0, 2,
00082                         array('GN', 'KN', 'PN', 'WR', 'PS')))
00083       $this->current++;
00084 
00085     // Initial 'X' is pronounced 'Z' e.g. 'Xavier'
00086     if (substr($this->original, 0, 1) == 'X') {
00087       $this->primary   .= "S";   // 'Z' maps to 'S'
00088       $this->secondary .= "S";
00089       $this->current++;
00090     }
00091 
00092     // main loop
00093 
00094     while (strlen($this->primary) < 4 || strlen($this->secondary < 4)) {
00095       if ($this->current >= $this->length)
00096         break;
00097 
00098       switch (substr($this->original, $this->current, 1)) {
00099         case 'A':
00100         case 'E':
00101         case 'I':
00102         case 'O':
00103         case 'U':
00104         case 'Y':
00105           if ($this->current == 0) {
00106             // all init vowels now map to 'A'
00107             $this->primary   .= 'A';
00108             $this->secondary .= 'A';
00109           }
00110           $this->current += 1;
00111           break;
00112 
00113         case 'B':
00114           // '-mb', e.g. "dumb", already skipped over ...
00115           $this->primary   .= 'P';
00116           $this->secondary .= 'P';
00117 
00118           if (substr($this->original, $this->current + 1, 1) == 'B')
00119             $this->current += 2;
00120           else
00121             $this->current += 1;
00122           break;
00123 
00124         case 'Ç':
00125           $this->primary   .= 'S';
00126           $this->secondary .= 'S';
00127           $this->current += 1;
00128           break;
00129 
00130         case 'C':
00131           // various gremanic
00132           if (($this->current > 1)
00133               && !$this->IsVowel($this->original, $this->current - 2)
00134               && $this->StringAt($this->original, $this->current - 1, 3,
00135                         array("ACH"))
00136               && ((substr($this->original, $this->current + 2, 1) != 'I')
00137                   && ((substr($this->original, $this->current + 2, 1) != 'E')
00138                       || $this->StringAt($this->original, $this->current - 2, 6,
00139                                 array("BACHER", "MACHER"))))) {
00140 
00141             $this->primary   .= 'K';
00142             $this->secondary .= 'K';
00143             $this->current += 2;
00144             break;
00145           }
00146 
00147           // special case 'caesar'
00148           if (($this->current == 0)
00149               && $this->StringAt($this->original, $this->current, 6,
00150                          array("CAESAR"))) {
00151             $this->primary   .= 'S';
00152             $this->secondary .= 'S';
00153             $this->current += 2;
00154             break;
00155           }
00156 
00157           // italian 'chianti'
00158           if ($this->StringAt($this->original, $this->current, 4,
00159                          array("CHIA"))) {
00160             $this->primary   .= 'K';
00161             $this->secondary .= 'K';
00162             $this->current += 2;
00163             break;
00164           }
00165 
00166           if ($this->StringAt($this->original, $this->current, 2,
00167                          array("CH"))) {
00168 
00169             // find 'michael'
00170             if (($this->current > 0)
00171                 && $this->StringAt($this->original, $this->current, 4,
00172                          array("CHAE"))) {
00173               $this->primary   .= 'K';
00174               $this->secondary .= 'X';
00175               $this->current += 2;
00176               break;
00177             }
00178 
00179             // greek roots e.g. 'chemistry', 'chorus'
00180             if (($this->current == 0)
00181                 && ($this->StringAt($this->original, $this->current + 1, 5,
00182                          array("HARAC", "HARIS"))
00183                     || $this->StringAt($this->original, $this->current + 1, 3,
00184                               array("HOR", "HYM", "HIA", "HEM")))
00185                 && !$this->StringAt($this->original, 0, 5, array("CHORE"))) {
00186               $this->primary   .= 'K';
00187               $this->secondary .= 'K';
00188               $this->current += 2;
00189               break;
00190             }
00191 
00192             // germanic, greek, or otherwise 'ch' for 'kh' sound
00193             if (($this->StringAt($this->original, 0, 4, array("VAN ", "VON "))
00194                  || $this->StringAt($this->original, 0, 3, array("SCH")))
00195                 // 'architect' but not 'arch', orchestra', 'orchid'
00196                 || $this->StringAt($this->original, $this->current - 2, 6,
00197                          array("ORCHES", "ARCHIT", "ORCHID"))
00198                 || $this->StringAt($this->original, $this->current + 2, 1,
00199                          array("T", "S"))
00200                 || (($this->StringAt($this->original, $this->current - 1, 1,
00201                          array("A","O","U","E"))
00202                      || ($this->current == 0))
00203                     // e.g. 'wachtler', 'weschsler', but not 'tichner'
00204                     && $this->StringAt($this->original, $this->current + 2, 1,
00205                          array("L","R","N","M","B","H","F","V","W"," ")))) {
00206               $this->primary   .= 'K';
00207               $this->secondary .= 'K';
00208             } else {
00209               if ($this->current > 0) {
00210                 if ($this->StringAt($this->original, 0, 2, array("MC"))) {
00211                   // e.g. 'McHugh'
00212                   $this->primary   .= 'K';
00213                   $this->secondary .= 'K';
00214                 } else {
00215                   $this->primary   .= 'X';
00216                   $this->secondary .= 'K';
00217                 }
00218               } else {
00219                 $this->primary   .= 'X';
00220                 $this->secondary .= 'X';
00221               }
00222             }
00223             $this->current += 2;
00224             break;
00225           }
00226 
00227           // e.g. 'czerny'
00228           if ($this->StringAt($this->original, $this->current, 2, array("CZ"))
00229               && !$this->StringAt($this->original, $this->current -2, 4,
00230                          array("WICZ"))) {
00231             $this->primary   .= 'S';
00232             $this->secondary .= 'X';
00233             $this->current += 2;
00234             break;
00235           }
00236 
00237           // e.g. 'focaccia'
00238           if ($this->StringAt($this->original, $this->current + 1, 3,
00239                      array("CIA"))) {
00240             $this->primary   .= 'X';
00241             $this->secondary .= 'X';
00242             $this->current += 3;
00243             break;
00244           }
00245 
00246           // double 'C', but not McClellan'
00247           if ($this->StringAt($this->original, $this->current, 2, array("CC"))
00248               && !(($this->current == 1)
00249                    && (substr($this->original, 0, 1) == 'M'))) {
00250             // 'bellocchio' but not 'bacchus'
00251             if ($this->StringAt($this->original, $this->current + 2, 1,
00252                        array("I","E","H"))
00253                 && !$this->StringAt($this->original, $this->current + 2, 2,
00254                           array("HU"))) {
00255               // 'accident', 'accede', 'succeed'
00256               if ((($this->current == 1)
00257                    && (substr($this->original, $this->current - 1, 1) == 'A'))
00258                   || $this->StringAt($this->original, $this->current - 1, 5,
00259                             array("UCCEE", "UCCES"))) {
00260                 $this->primary   .= "KS";
00261                 $this->secondary .= "KS";
00262                 // 'bacci', 'bertucci', other italian
00263               } else {
00264                 $this->primary   .= "X";
00265                 $this->secondary .= "X";
00266               }
00267               $this->current += 3;
00268               break;
00269             } else {
00270               // Pierce's rule
00271               $this->primary   .= "K";
00272               $this->secondary .= "K";
00273               $this->current += 2;
00274               break;
00275             }
00276           }
00277 
00278           if ($this->StringAt($this->original, $this->current, 2,
00279                      array("CK","CG","CQ"))) {
00280             $this->primary   .= "K";
00281             $this->secondary .= "K";
00282             $this->current += 2;
00283             break;
00284           }
00285 
00286           if ($this->StringAt($this->original, $this->current, 2,
00287                      array("CI","CE","CY"))) {
00288             // italian vs. english
00289             if ($this->StringAt($this->original, $this->current, 3,
00290                        array("CIO","CIE","CIA"))) {
00291               $this->primary   .= "S";
00292               $this->secondary .= "X";
00293             } else {
00294               $this->primary   .= "S";
00295               $this->secondary .= "S";
00296             }
00297             $this->current += 2;
00298             break;
00299           }
00300 
00301           // else
00302           $this->primary   .= "K";
00303           $this->secondary .= "K";
00304 
00305           // name sent in 'mac caffrey', 'mac gregor'
00306           if ($this->StringAt($this->original, $this->current + 1, 2,
00307                      array(" C"," Q"," G"))) {
00308             $this->current += 3;
00309           } else {
00310             if ($this->StringAt($this->original, $this->current + 1, 1,
00311                        array("C","K","Q"))
00312                 && !$this->StringAt($this->original, $this->current + 1, 2,
00313                            array("CE","CI"))) {
00314               $this->current += 2;
00315             } else {
00316               $this->current += 1;
00317             }
00318           }
00319           break;
00320 
00321         case 'D':
00322           if ($this->StringAt($this->original, $this->current, 2,
00323                      array("DG"))) {
00324             if ($this->StringAt($this->original, $this->current + 2, 1,
00325                        array("I","E","Y"))) {
00326               // e.g. 'edge'
00327               $this->primary   .= "J";
00328               $this->secondary .= "J";
00329               $this->current += 3;
00330 
00331               break;
00332             } else {
00333               // e.g. 'edgar'
00334               $this->primary   .= "TK";
00335               $this->secondary .= "TK";
00336               $this->current += 2;
00337               break;
00338             }
00339           }
00340 
00341           if ($this->StringAt($this->original, $this->current, 2,
00342                      array("DT","DD"))) {
00343             $this->primary   .= "T";
00344             $this->secondary .= "T";
00345             $this->current += 2;
00346             break;
00347           }
00348 
00349           // else
00350           $this->primary   .= "T";
00351           $this->secondary .= "T";
00352           $this->current += 1;
00353           break;
00354 
00355         case 'F':
00356           if (substr($this->original, $this->current + 1, 1) == 'F')
00357             $this->current += 2;
00358           else
00359             $this->current += 1;
00360           $this->primary   .= "F";
00361           $this->secondary .= "F";
00362           break;
00363 
00364         case 'G':
00365           if (substr($this->original, $this->current + 1, 1) == 'H') {
00366             if (($this->current > 0)
00367                 && !$this->IsVowel($this->original, $this->current - 1)) {
00368               $this->primary   .= "K";
00369               $this->secondary .= "K";
00370               $this->current += 2;
00371               break;
00372             }
00373 
00374             if ($this->current < 3) {
00375               // 'ghislane', 'ghiradelli'
00376               if ($this->current == 0) {
00377                 if (substr($this->original, $this->current + 2, 1) == 'I') {
00378                   $this->primary   .= "J";
00379                   $this->secondary .= "J";
00380                 } else {
00381                   $this->primary   .= "K";
00382                   $this->secondary .= "K";
00383                 }
00384                 $this->current += 2;
00385                 break;
00386               }
00387             }
00388 
00389             // Parker's rule (with some further refinements) - e.g. 'hugh'
00390             if ((($this->current > 1)
00391                  && $this->StringAt($this->original, $this->current - 2, 1,
00392                            array("B","H","D")))
00393                 // e.g. 'bough'
00394                 || (($this->current > 2)
00395                     &&  $this->StringAt($this->original, $this->current - 3, 1,
00396                                array("B","H","D")))
00397                 // e.g. 'broughton'
00398                 || (($this->current > 3)
00399                     && $this->StringAt($this->original, $this->current - 4, 1,
00400                                array("B","H")))) {
00401               $this->current += 2;
00402               break;
00403             } else {
00404               // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough'
00405               if (($this->current > 2)
00406                   && (substr($this->original, $this->current - 1, 1) == 'U')
00407                   && $this->StringAt($this->original, $this->current - 3, 1,
00408                             array("C","G","L","R","T"))) {
00409                 $this->primary   .= "F";
00410                 $this->secondary .= "F";
00411               } elseif (($this->current > 0)
00412                         && substr($this->original, $this->current - 1, 1) != 'I') {
00413                 $this->primary   .= "K";
00414                 $this->secondary .= "K";
00415               }
00416               $this->current += 2;
00417               break;
00418             }
00419           }
00420 
00421           if (substr($this->original, $this->current + 1, 1) == 'N') {
00422             if (($this->current == 1) && $this->IsVowel($this->original, 0)
00423                 && !$this->SlavoGermanic($this->original)) {
00424               $this->primary   .= "KN";
00425               $this->secondary .= "N";
00426             } else {
00427               // not e.g. 'cagney'
00428               if (!$this->StringAt($this->original, $this->current + 2, 2,
00429                           array("EY"))
00430                   && (substr($this->original, $this->current + 1) != "Y")
00431                   && !$this->SlavoGermanic($this->original)) {
00432                  $this->primary   .= "N";
00433                  $this->secondary .= "KN";
00434               } else {
00435                  $this->primary   .= "KN";
00436                  $this->secondary .= "KN";
00437               }
00438             }
00439             $this->current += 2;
00440             break;
00441           }
00442 
00443           // 'tagliaro'
00444           if ($this->StringAt($this->original, $this->current + 1, 2,
00445                      array("LI"))
00446               && !$this->SlavoGermanic($this->original)) {
00447             $this->primary   .= "KL";
00448             $this->secondary .= "L";
00449             $this->current += 2;
00450             break;
00451           }
00452 
00453           // -ges-, -gep-, -gel- at beginning
00454           if (($this->current == 0)
00455               && ((substr($this->original, $this->current + 1, 1) == 'Y')
00456                   || $this->StringAt($this->original, $this->current + 1, 2,
00457                             array("ES","EP","EB","EL","EY","IB","IL","IN","IE",
00458                                   "EI","ER")))) {
00459             $this->primary   .= "K";
00460             $this->secondary .= "J";
00461             $this->current += 2;
00462             break;
00463           }
00464 
00465           // -ger-, -gy-
00466           if (($this->StringAt($this->original, $this->current + 1, 2,
00467                       array("ER"))
00468                || (substr($this->original, $this->current + 1, 1) == 'Y'))
00469               && !$this->StringAt($this->original, 0, 6,
00470                          array("DANGER","RANGER","MANGER"))
00471               && !$this->StringAt($this->original, $this->current -1, 1,
00472                          array("E", "I"))
00473               && !$this->StringAt($this->original, $this->current -1, 3,
00474                          array("RGY","OGY"))) {
00475             $this->primary   .= "K";
00476             $this->secondary .= "J";
00477             $this->current += 2;
00478             break;
00479           }
00480 
00481           // italian e.g. 'biaggi'
00482           if ($this->StringAt($this->original, $this->current + 1, 1,
00483                      array("E","I","Y"))
00484               || $this->StringAt($this->original, $this->current -1, 4,
00485                         array("AGGI","OGGI"))) {
00486             // obvious germanic
00487             if (($this->StringAt($this->original, 0, 4, array("VAN ", "VON "))
00488                  || $this->StringAt($this->original, 0, 3, array("SCH")))
00489                 || $this->StringAt($this->original, $this->current + 1, 2,
00490                           array("ET"))) {
00491               $this->primary   .= "K";
00492               $this->secondary .= "K";
00493             } else {
00494               // always soft if french ending
00495               if ($this->StringAt($this->original, $this->current + 1, 4,
00496                          array("IER "))) {
00497                 $this->primary   .= "J";
00498                 $this->secondary .= "J";
00499               } else {
00500                 $this->primary   .= "J";
00501                 $this->secondary .= "K";
00502               }
00503             }
00504             $this->current += 2;
00505             break;
00506           }
00507 
00508           if (substr($this->original, $this->current +1, 1) == 'G')
00509             $this->current += 2;
00510           else
00511             $this->current += 1;
00512 
00513           $this->primary   .= 'K';
00514           $this->secondary .= 'K';
00515           break;
00516 
00517         case 'H':
00518           // only keep if first & before vowel or btw. 2 vowels
00519           if ((($this->current == 0) ||
00520                $this->IsVowel($this->original, $this->current - 1))
00521               && $this->IsVowel($this->original, $this->current + 1)) {
00522             $this->primary   .= 'H';
00523             $this->secondary .= 'H';
00524             $this->current += 2;
00525           } else
00526             $this->current += 1;
00527           break;
00528 
00529         case 'J':
00530           // obvious spanish, 'jose', 'san jacinto'
00531           if ($this->StringAt($this->original, $this->current, 4,
00532                      array("JOSE"))
00533               || $this->StringAt($this->original, 0, 4, array("SAN "))) {
00534             if ((($this->current == 0)
00535                  && (substr($this->original, $this->current + 4, 1) == ' '))
00536                 || $this->StringAt($this->original, 0, 4, array("SAN "))) {
00537               $this->primary   .= 'H';
00538               $this->secondary .= 'H';
00539             } else {
00540               $this->primary   .= "J";
00541               $this->secondary .= 'H';
00542             }
00543             $this->current += 1;
00544             break;
00545           }
00546 
00547           if (($this->current == 0)
00548               && !$this->StringAt($this->original, $this->current, 4,
00549                      array("JOSE"))) {
00550             $this->primary   .= 'J';  // Yankelovich/Jankelowicz
00551             $this->secondary .= 'A';
00552           } else {
00553             // spanish pron. of .e.g. 'bajador'
00554             if ($this->IsVowel($this->original, $this->current - 1)
00555                 && !$this->SlavoGermanic($this->original)
00556                 && ((substr($this->original, $this->current + 1, 1) == 'A')
00557                     || (substr($this->original, $this->current + 1, 1) == 'O'))) {
00558               $this->primary   .= "J";
00559               $this->secondary .= "H";
00560             } else {
00561               if ($this->current == $this->last) {
00562                 $this->primary   .= "J";
00563                 $this->secondary .= "";
00564               } else {
00565                 if (!$this->StringAt($this->original, $this->current + 1, 1,
00566                             array("L","T","K","S","N","M","B","Z"))
00567                     && !$this->StringAt($this->original, $this->current - 1, 1,
00568                                array("S","K","L"))) {
00569                   $this->primary   .= "J";
00570                   $this->secondary .= "J";
00571                 }
00572               }
00573             }
00574           }
00575 
00576           if (substr($this->original, $this->current + 1, 1) == 'J') // it could happen
00577             $this->current += 2;
00578           else
00579             $this->current += 1;
00580           break;
00581 
00582         case 'K':
00583           if (substr($this->original, $this->current + 1, 1) == 'K')
00584             $this->current += 2;
00585           else
00586             $this->current += 1;
00587           $this->primary   .= "K";
00588           $this->secondary .= "K";
00589           break;
00590 
00591         case 'L':
00592           if (substr($this->original, $this->current + 1, 1) == 'L') {
00593             // spanish e.g. 'cabrillo', 'gallegos'
00594             if ((($this->current == ($this->length - 3))
00595                  && $this->StringAt($this->original, $this->current - 1, 4,
00596                            array("ILLO","ILLA","ALLE")))
00597                 || (($this->StringAt($this->original, $this->last-1, 2,
00598                             array("AS","OS"))
00599                   || $this->StringAt($this->original, $this->last, 1,
00600                             array("A","O")))
00601                  && $this->StringAt($this->original, $this->current - 1, 4,
00602                            array("ALLE")))) {
00603               $this->primary   .= "L";
00604               $this->secondary .= "";
00605               $this->current += 2;
00606               break;
00607             }
00608             $this->current += 2;
00609           } else
00610             $this->current += 1;
00611           $this->primary   .= "L";
00612           $this->secondary .= "L";
00613           break;
00614 
00615         case 'M':
00616           if (($this->StringAt($this->original, $this->current - 1, 3,
00617                      array("UMB"))
00618                && ((($this->current + 1) == $this->last)
00619                    || $this->StringAt($this->original, $this->current + 2, 2,
00620                             array("ER"))))
00621               // 'dumb', 'thumb'
00622               || (substr($this->original, $this->current + 1, 1) == 'M')) {
00623               $this->current += 2;
00624           } else {
00625               $this->current += 1;
00626           }
00627           $this->primary   .= "M";
00628           $this->secondary .= "M";
00629           break;
00630 
00631         case 'N':
00632           if (substr($this->original, $this->current + 1, 1) == 'N')
00633             $this->current += 2;
00634           else
00635             $this->current += 1;
00636           $this->primary   .= "N";
00637           $this->secondary .= "N";
00638           break;
00639 
00640         case 'Ñ':
00641           $this->current += 1;
00642           $this->primary   .= "N";
00643           $this->secondary .= "N";
00644           break;
00645 
00646         case 'P':
00647           if (substr($this->original, $this->current + 1, 1) == 'H') {
00648             $this->current += 2;
00649             $this->primary   .= "F";
00650             $this->secondary .= "F";
00651             break;
00652           }
00653 
00654           // also account for "campbell" and "raspberry"
00655           if ($this->StringAt($this->original, $this->current + 1, 1,
00656                      array("P","B")))
00657             $this->current += 2;
00658           else
00659             $this->current += 1;
00660           $this->primary   .= "P";
00661           $this->secondary .= "P";
00662           break;
00663 
00664         case 'Q':
00665           if (substr($this->original, $this->current + 1, 1) == 'Q')
00666             $this->current += 2;
00667           else
00668             $this->current += 1;
00669           $this->primary   .= "K";
00670           $this->secondary .= "K";
00671           break;
00672 
00673         case 'R':
00674           // french e.g. 'rogier', but exclude 'hochmeier'
00675           if (($this->current == $this->last)
00676               && !$this->SlavoGermanic($this->original)
00677               && $this->StringAt($this->original, $this->current - 2, 2,
00678                         array("IE"))
00679               && !$this->StringAt($this->original, $this->current - 4, 2,
00680                          array("ME","MA"))) {
00681             $this->primary   .= "";
00682             $this->secondary .= "R";
00683           } else {
00684             $this->primary   .= "R";
00685             $this->secondary .= "R";
00686           }
00687           if (substr($this->original, $this->current + 1, 1) == 'R')
00688             $this->current += 2;
00689           else
00690             $this->current += 1;
00691           break;
00692 
00693         case 'S':
00694           // special cases 'island', 'isle', 'carlisle', 'carlysle'
00695           if ($this->StringAt($this->original, $this->current - 1, 3,
00696                      array("ISL","YSL"))) {
00697             $this->current += 1;
00698             break;
00699           }
00700 
00701           // special case 'sugar-'
00702           if (($this->current == 0)
00703               && $this->StringAt($this->original, $this->current, 5,
00704                         array("SUGAR"))) {
00705             $this->primary   .= "X";
00706             $this->secondary .= "S";
00707             $this->current += 1;
00708             break;
00709           }
00710 
00711           if ($this->StringAt($this->original, $this->current, 2,
00712                      array("SH"))) {
00713             // germanic
00714             if ($this->StringAt($this->original, $this->current + 1, 4,
00715                        array("HEIM","HOEK","HOLM","HOLZ"))) {
00716               $this->primary   .= "S";
00717               $this->secondary .= "S";
00718             } else {
00719               $this->primary   .= "X";
00720               $this->secondary .= "X";
00721             }
00722             $this->current += 2;
00723             break;
00724           }
00725 
00726           // italian & armenian
00727           if ($this->StringAt($this->original, $this->current, 3,
00728                      array("SIO","SIA"))
00729               || $this->StringAt($this->original, $this->current, 4,
00730                         array("SIAN"))) {
00731             if (!$this->SlavoGermanic($this->original)) {
00732               $this->primary   .= "S";
00733               $this->secondary .= "X";
00734             } else {
00735               $this->primary   .= "S";
00736               $this->secondary .= "S";
00737             }
00738             $this->current += 3;
00739             break;
00740           }
00741 
00742           // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider'
00743           // also, -sz- in slavic language altho in hungarian it is pronounced 's'
00744           if ((($this->current == 0)
00745                && $this->StringAt($this->original, $this->current + 1, 1,
00746                          array("M","N","L","W")))
00747               || $this->StringAt($this->original, $this->current + 1, 1,
00748                         array("Z"))) {
00749             $this->primary   .= "S";
00750             $this->secondary .= "X";
00751             if ($this->StringAt($this->original, $this->current + 1, 1,
00752                         array("Z")))
00753               $this->current += 2;
00754             else
00755               $this->current += 1;
00756             break;
00757           }
00758 
00759           if ($this->StringAt($this->original, $this->current, 2,
00760                      array("SC"))) {
00761             // Schlesinger's rule
00762             if (substr($this->original, $this->current + 2, 1) == 'H')
00763               // dutch origin, e.g. 'school', 'schooner'
00764               if ($this->StringAt($this->original, $this->current + 3, 2,
00765                          array("OO","ER","EN","UY","ED","EM"))) {
00766                 // 'schermerhorn', 'schenker'
00767                 if ($this->StringAt($this->original, $this->current + 3, 2,
00768                            array("ER","EN"))) {
00769                   $this->primary   .= "X";
00770                   $this->secondary .= "SK";
00771                 } else {
00772                   $this->primary   .= "SK";
00773                   $this->secondary .= "SK";
00774                 }
00775                 $this->current += 3;
00776                 break;
00777               } else {
00778                 if (($this->current == 0)
00779                     && !$this->IsVowel($this->original, 3)
00780                     && (substr($this->original, $this->current + 3, 1) != 'W')) {
00781                   $this->primary   .= "X";
00782                   $this->secondary .= "S";
00783                 } else {
00784                   $this->primary   .= "X";
00785                   $this->secondary .= "X";
00786                 }
00787                 $this->current += 3;
00788                 break;
00789               }
00790 
00791               if ($this->StringAt($this->original, $this->current + 2, 1,
00792                          array("I","E","Y"))) {
00793                 $this->primary   .= "S";
00794                 $this->secondary .= "S";
00795                 $this->current += 3;
00796                 break;
00797               }
00798 
00799             // else
00800             $this->primary   .= "SK";
00801             $this->secondary .= "SK";
00802             $this->current += 3;
00803             break;
00804           }
00805 
00806           // french e.g. 'resnais', 'artois'
00807           if (($this->current == $this->last)
00808               && $this->StringAt($this->original, $this->current - 2, 2,
00809                         array("AI","OI"))) {
00810             $this->primary   .= "";
00811             $this->secondary .= "S";
00812           } else {
00813             $this->primary   .= "S";
00814             $this->secondary .= "S";
00815           }
00816 
00817           if ($this->StringAt($this->original, $this->current + 1, 1,
00818                      array("S","Z")))
00819             $this->current += 2;
00820           else
00821             $this->current += 1;
00822           break;
00823 
00824         case 'T':
00825           if ($this->StringAt($this->original, $this->current, 4,
00826                      array("TION"))) {
00827             $this->primary   .= "X";
00828             $this->secondary .= "X";
00829             $this->current += 3;
00830             break;
00831           }
00832 
00833           if ($this->StringAt($this->original, $this->current, 3,
00834                      array("TIA","TCH"))) {
00835             $this->primary   .= "X";
00836             $this->secondary .= "X";
00837             $this->current += 3;
00838             break;
00839           }
00840 
00841           if ($this->StringAt($this->original, $this->current, 2,
00842                      array("TH"))
00843               || $this->StringAt($this->original, $this->current, 3,
00844                             array("TTH"))) {
00845             // special case 'thomas', 'thames' or germanic
00846             if ($this->StringAt($this->original, $this->current + 2, 2,
00847                        array("OM","AM"))
00848                 || $this->StringAt($this->original, 0, 4, array("VAN ","VON "))
00849                 || $this->StringAt($this->original, 0, 3, array("SCH"))) {
00850               $this->primary   .= "T";
00851               $this->secondary .= "T";
00852             } else {
00853               $this->primary   .= "0";
00854               $this->secondary .= "T";
00855             }
00856             $this->current += 2;
00857             break;
00858           }
00859 
00860           if ($this->StringAt($this->original, $this->current + 1, 1,
00861                      array("T","D")))
00862             $this->current += 2;
00863           else
00864             $this->current += 1;
00865           $this->primary   .= "T";
00866           $this->secondary .= "T";
00867           break;
00868 
00869         case 'V':
00870           if (substr($this->original, $this->current + 1, 1) == 'V')
00871             $this->current += 2;
00872           else
00873             $this->current += 1;
00874           $this->primary   .= "F";
00875           $this->secondary .= "F";
00876           break;
00877 
00878         case 'W':
00879           // can also be in middle of word
00880           if ($this->StringAt($this->original, $this->current, 2, array("WR"))) {
00881             $this->primary   .= "R";
00882             $this->secondary .= "R";
00883             $this->current += 2;
00884             break;
00885           }
00886 
00887           if (($this->current == 0)
00888               && ($this->IsVowel($this->original, $this->current + 1)
00889                   || $this->StringAt($this->original, $this->current, 2,
00890                             array("WH")))) {
00891             // Wasserman should match Vasserman
00892             if ($this->IsVowel($this->original, $this->current + 1)) {
00893               $this->primary   .= "A";
00894               $this->secondary .= "F";
00895             } else {
00896               // need Uomo to match Womo
00897               $this->primary   .= "A";
00898               $this->secondary .= "A";
00899             }
00900           }
00901 
00902           // Arnow should match Arnoff
00903           if ((($this->current == $this->last)
00904                 && $this->IsVowel($this->original, $this->current - 1))
00905               || $this->StringAt($this->original, $this->current - 1, 5,
00906                         array("EWSKI","EWSKY","OWSKI","OWSKY"))
00907               || $this->StringAt($this->original, 0, 3, array("SCH"))) {
00908             $this->primary   .= "";
00909             $this->secondary .= "F";
00910             $this->current += 1;
00911             break;
00912           }
00913 
00914           // polish e.g. 'filipowicz'
00915           if ($this->StringAt($this->original, $this->current, 4,
00916                      array("WICZ","WITZ"))) {
00917             $this->primary   .= "TS";
00918             $this->secondary .= "FX";
00919             $this->current += 4;
00920             break;
00921           }
00922 
00923           // else skip it
00924           $this->current += 1;
00925           break;
00926 
00927         case 'X':
00928           // french e.g. breaux
00929           if (!(($this->current == $this->last)
00930                 && ($this->StringAt($this->original, $this->current - 3, 3,
00931                            array("IAU", "EAU"))
00932                  || $this->StringAt($this->original, $this->current - 2, 2,
00933                            array("AU", "OU"))))) {
00934             $this->primary   .= "KS";
00935             $this->secondary .= "KS";
00936           }
00937 
00938           if ($this->StringAt($this->original, $this->current + 1, 1,
00939                      array("C","X")))
00940             $this->current += 2;
00941           else
00942             $this->current += 1;
00943           break;
00944 
00945         case 'Z':
00946           // chinese pinyin e.g. 'zhao'
00947           if (substr($this->original, $this->current + 1, 1) == "H") {
00948             $this->primary   .= "J";
00949             $this->secondary .= "J";
00950             $this->current += 2;
00951             break;
00952           } elseif ($this->StringAt($this->original, $this->current + 1, 2,
00953                            array("ZO", "ZI", "ZA"))
00954                     || ($this->SlavoGermanic($this->original)
00955                         && (($this->current > 0)
00956                             && substr($this->original, $this->current - 1, 1) != 'T'))) {
00957             $this->primary   .= "S";
00958             $this->secondary .= "TS";
00959           } else {
00960             $this->primary   .= "S";
00961             $this->secondary .= "S";
00962           }
00963 
00964           if (substr($this->original, $this->current + 1, 1) == 'Z')
00965             $this->current += 2;
00966           else
00967             $this->current += 1;
00968           break;
00969 
00970         default:
00971           $this->current += 1;
00972 
00973       } // end switch
00974 
00975     // printf("<br>ORIGINAL:    '%s'\n", $this->original);
00976     // printf("<br>current:    '%s'\n", $this->current);
00977     // printf("<br>  PRIMARY:   '%s'\n", $this->primary);
00978     // printf("<br>  SECONDARY: '%s'\n", $this->secondary);
00979 
00980     } // end while
00981 
00982     $this->primary   = substr($this->primary,   0, 4);
00983     $this->secondary = substr($this->secondary, 0, 4);
00984 
00985     $result["primary"] = $this->primary ;
00986     $result["secondary"] = $this->secondary ;
00987 
00988     return $result ;
00989 
00990   } // end of function MetaPhone
00991 
00992 
00993   // Private methods
00994 
00995   function StringAt($string, $start, $length, $list) {
00996     if (($start <0) || ($start >= strlen($string)))
00997       return 0;
00998 
00999     for ($i=0; $i<count($list); $i++) {
01000       if ($list[$i] == substr($string, $start, $length))
01001         return 1;
01002     }
01003     return 0;
01004   }
01005 
01013   function IsVowel($string, $pos) {
01014     return ereg("[AEIOUY]", substr($string, $pos, 1));
01015   }
01016 
01023   function SlavoGermanic($string) {
01024     return ereg("W|K|CZ|WITZ", $string);
01025   }
01026 } // end of class MetaPhone
01027 ?>


Généré par TYPO3 Ameos avec  doxygen 1.4.6