Documentation TYPO3 par Ameos |
00001 <?php 00002 // VERSION DoubleMetaphone Class 1.01 00003 // 00004 // DESCRIPTION 00005 // 00006 // This class implements a "sounds like" algorithm developed 00007 // by Lawrence Philips which he published in the June, 2000 issue 00008 // of C/C++ Users Journal. Double Metaphone is an improved 00009 // version of Philips' original Metaphone algorithm. 00010 // 00011 // COPYRIGHT 00012 // 00013 // Copyright 2001, Stephen Woodbridge <woodbri@swoodbridge.com> 00014 // All rights reserved. 00015 // 00016 // http://swoodbridge.com/DoubleMetaPhone/ 00017 // 00018 // This PHP translation is based heavily on the C implementation 00019 // by Maurice Aubrey <maurice@hevanet.com>, which in turn 00020 // is based heavily on the C++ implementation by 00021 // Lawrence Philips and incorporates several bug fixes courtesy 00022 // of Kevin Atkinson <kevina@users.sourceforge.net>. 00023 // 00024 // This module is free software; you may redistribute it and/or 00025 // modify it under the same terms as Perl itself. 00026 // 00027 // CONTRIBUTIONS 00028 // 00029 // 17-May-2002 Geoff Caplan http://www.advantae.com 00030 // Bug fix: added code to return class object which I forgot to do 00031 // Created a functional callable version instead of the class version 00032 // which is faster if you are calling this a lot. 00033 // 00034 // ------------------------------------------------------------------ 00035 00036 00037 00038 // TYPO3: Had to change name to "user_DoubleMetaPhone" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so: 00039 // TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you simply configure TYPO3 so by setting the line below in your localconf.php file: 00040 // TYPO3: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone'; 00041 // TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example. 00042 00043 class user_DoubleMetaPhone 00044 { 00045 // properties 00046 00047 var $original = ""; 00048 var $primary = ""; 00049 var $secondary = ""; 00050 var $length = 0; 00051 var $last = 0; 00052 var $current = 0; 00053 00054 // methods 00055 00056 // TYPO3 specific API to this class. BEGIN 00057 function metaphone($string,$sys_language_uid=0) { 00058 $res = $this->DoubleMetaPhone($string); 00059 #debug(array($string,$res['primary'])); 00060 return $res['primary']; 00061 } 00062 // TYPO3 specific API to this class. END 00063 00064 00065 // Public method 00066 00067 function DoubleMetaPhone($string) { 00068 00069 $this->primary = ""; 00070 $this->secondary = ""; 00071 $this->current = 0; 00072 00073 $this->current = 0; 00074 $this->length = strlen($string); 00075 $this->last = $this->length - 1; 00076 $this->original = $string . " "; 00077 00078 $this->original = strtoupper($this->original); 00079 00080 // skip this at beginning of word 00081 if ($this->StringAt($this->original, 0, 2, 00082 array('GN', 'KN', 'PN', 'WR', 'PS'))) 00083 $this->current++; 00084 00085 // Initial 'X' is pronounced 'Z' e.g. 'Xavier' 00086 if (substr($this->original, 0, 1) == 'X') { 00087 $this->primary .= "S"; // 'Z' maps to 'S' 00088 $this->secondary .= "S"; 00089 $this->current++; 00090 } 00091 00092 // main loop 00093 00094 while (strlen($this->primary) < 4 || strlen($this->secondary < 4)) { 00095 if ($this->current >= $this->length) 00096 break; 00097 00098 switch (substr($this->original, $this->current, 1)) { 00099 case 'A': 00100 case 'E': 00101 case 'I': 00102 case 'O': 00103 case 'U': 00104 case 'Y': 00105 if ($this->current == 0) { 00106 // all init vowels now map to 'A' 00107 $this->primary .= 'A'; 00108 $this->secondary .= 'A'; 00109 } 00110 $this->current += 1; 00111 break; 00112 00113 case 'B': 00114 // '-mb', e.g. "dumb", already skipped over ... 00115 $this->primary .= 'P'; 00116 $this->secondary .= 'P'; 00117 00118 if (substr($this->original, $this->current + 1, 1) == 'B') 00119 $this->current += 2; 00120 else 00121 $this->current += 1; 00122 break; 00123 00124 case 'Ç': 00125 $this->primary .= 'S'; 00126 $this->secondary .= 'S'; 00127 $this->current += 1; 00128 break; 00129 00130 case 'C': 00131 // various gremanic 00132 if (($this->current > 1) 00133 && !$this->IsVowel($this->original, $this->current - 2) 00134 && $this->StringAt($this->original, $this->current - 1, 3, 00135 array("ACH")) 00136 && ((substr($this->original, $this->current + 2, 1) != 'I') 00137 && ((substr($this->original, $this->current + 2, 1) != 'E') 00138 || $this->StringAt($this->original, $this->current - 2, 6, 00139 array("BACHER", "MACHER"))))) { 00140 00141 $this->primary .= 'K'; 00142 $this->secondary .= 'K'; 00143 $this->current += 2; 00144 break; 00145 } 00146 00147 // special case 'caesar' 00148 if (($this->current == 0) 00149 && $this->StringAt($this->original, $this->current, 6, 00150 array("CAESAR"))) { 00151 $this->primary .= 'S'; 00152 $this->secondary .= 'S'; 00153 $this->current += 2; 00154 break; 00155 } 00156 00157 // italian 'chianti' 00158 if ($this->StringAt($this->original, $this->current, 4, 00159 array("CHIA"))) { 00160 $this->primary .= 'K'; 00161 $this->secondary .= 'K'; 00162 $this->current += 2; 00163 break; 00164 } 00165 00166 if ($this->StringAt($this->original, $this->current, 2, 00167 array("CH"))) { 00168 00169 // find 'michael' 00170 if (($this->current > 0) 00171 && $this->StringAt($this->original, $this->current, 4, 00172 array("CHAE"))) { 00173 $this->primary .= 'K'; 00174 $this->secondary .= 'X'; 00175 $this->current += 2; 00176 break; 00177 } 00178 00179 // greek roots e.g. 'chemistry', 'chorus' 00180 if (($this->current == 0) 00181 && ($this->StringAt($this->original, $this->current + 1, 5, 00182 array("HARAC", "HARIS")) 00183 || $this->StringAt($this->original, $this->current + 1, 3, 00184 array("HOR", "HYM", "HIA", "HEM"))) 00185 && !$this->StringAt($this->original, 0, 5, array("CHORE"))) { 00186 $this->primary .= 'K'; 00187 $this->secondary .= 'K'; 00188 $this->current += 2; 00189 break; 00190 } 00191 00192 // germanic, greek, or otherwise 'ch' for 'kh' sound 00193 if (($this->StringAt($this->original, 0, 4, array("VAN ", "VON ")) 00194 || $this->StringAt($this->original, 0, 3, array("SCH"))) 00195 // 'architect' but not 'arch', orchestra', 'orchid' 00196 || $this->StringAt($this->original, $this->current - 2, 6, 00197 array("ORCHES", "ARCHIT", "ORCHID")) 00198 || $this->StringAt($this->original, $this->current + 2, 1, 00199 array("T", "S")) 00200 || (($this->StringAt($this->original, $this->current - 1, 1, 00201 array("A","O","U","E")) 00202 || ($this->current == 0)) 00203 // e.g. 'wachtler', 'weschsler', but not 'tichner' 00204 && $this->StringAt($this->original, $this->current + 2, 1, 00205 array("L","R","N","M","B","H","F","V","W"," ")))) { 00206 $this->primary .= 'K'; 00207 $this->secondary .= 'K'; 00208 } else { 00209 if ($this->current > 0) { 00210 if ($this->StringAt($this->original, 0, 2, array("MC"))) { 00211 // e.g. 'McHugh' 00212 $this->primary .= 'K'; 00213 $this->secondary .= 'K'; 00214 } else { 00215 $this->primary .= 'X'; 00216 $this->secondary .= 'K'; 00217 } 00218 } else { 00219 $this->primary .= 'X'; 00220 $this->secondary .= 'X'; 00221 } 00222 } 00223 $this->current += 2; 00224 break; 00225 } 00226 00227 // e.g. 'czerny' 00228 if ($this->StringAt($this->original, $this->current, 2, array("CZ")) 00229 && !$this->StringAt($this->original, $this->current -2, 4, 00230 array("WICZ"))) { 00231 $this->primary .= 'S'; 00232 $this->secondary .= 'X'; 00233 $this->current += 2; 00234 break; 00235 } 00236 00237 // e.g. 'focaccia' 00238 if ($this->StringAt($this->original, $this->current + 1, 3, 00239 array("CIA"))) { 00240 $this->primary .= 'X'; 00241 $this->secondary .= 'X'; 00242 $this->current += 3; 00243 break; 00244 } 00245 00246 // double 'C', but not McClellan' 00247 if ($this->StringAt($this->original, $this->current, 2, array("CC")) 00248 && !(($this->current == 1) 00249 && (substr($this->original, 0, 1) == 'M'))) { 00250 // 'bellocchio' but not 'bacchus' 00251 if ($this->StringAt($this->original, $this->current + 2, 1, 00252 array("I","E","H")) 00253 && !$this->StringAt($this->original, $this->current + 2, 2, 00254 array("HU"))) { 00255 // 'accident', 'accede', 'succeed' 00256 if ((($this->current == 1) 00257 && (substr($this->original, $this->current - 1, 1) == 'A')) 00258 || $this->StringAt($this->original, $this->current - 1, 5, 00259 array("UCCEE", "UCCES"))) { 00260 $this->primary .= "KS"; 00261 $this->secondary .= "KS"; 00262 // 'bacci', 'bertucci', other italian 00263 } else { 00264 $this->primary .= "X"; 00265 $this->secondary .= "X"; 00266 } 00267 $this->current += 3; 00268 break; 00269 } else { 00270 // Pierce's rule 00271 $this->primary .= "K"; 00272 $this->secondary .= "K"; 00273 $this->current += 2; 00274 break; 00275 } 00276 } 00277 00278 if ($this->StringAt($this->original, $this->current, 2, 00279 array("CK","CG","CQ"))) { 00280 $this->primary .= "K"; 00281 $this->secondary .= "K"; 00282 $this->current += 2; 00283 break; 00284 } 00285 00286 if ($this->StringAt($this->original, $this->current, 2, 00287 array("CI","CE","CY"))) { 00288 // italian vs. english 00289 if ($this->StringAt($this->original, $this->current, 3, 00290 array("CIO","CIE","CIA"))) { 00291 $this->primary .= "S"; 00292 $this->secondary .= "X"; 00293 } else { 00294 $this->primary .= "S"; 00295 $this->secondary .= "S"; 00296 } 00297 $this->current += 2; 00298 break; 00299 } 00300 00301 // else 00302 $this->primary .= "K"; 00303 $this->secondary .= "K"; 00304 00305 // name sent in 'mac caffrey', 'mac gregor' 00306 if ($this->StringAt($this->original, $this->current + 1, 2, 00307 array(" C"," Q"," G"))) { 00308 $this->current += 3; 00309 } else { 00310 if ($this->StringAt($this->original, $this->current + 1, 1, 00311 array("C","K","Q")) 00312 && !$this->StringAt($this->original, $this->current + 1, 2, 00313 array("CE","CI"))) { 00314 $this->current += 2; 00315 } else { 00316 $this->current += 1; 00317 } 00318 } 00319 break; 00320 00321 case 'D': 00322 if ($this->StringAt($this->original, $this->current, 2, 00323 array("DG"))) { 00324 if ($this->StringAt($this->original, $this->current + 2, 1, 00325 array("I","E","Y"))) { 00326 // e.g. 'edge' 00327 $this->primary .= "J"; 00328 $this->secondary .= "J"; 00329 $this->current += 3; 00330 00331 break; 00332 } else { 00333 // e.g. 'edgar' 00334 $this->primary .= "TK"; 00335 $this->secondary .= "TK"; 00336 $this->current += 2; 00337 break; 00338 } 00339 } 00340 00341 if ($this->StringAt($this->original, $this->current, 2, 00342 array("DT","DD"))) { 00343 $this->primary .= "T"; 00344 $this->secondary .= "T"; 00345 $this->current += 2; 00346 break; 00347 } 00348 00349 // else 00350 $this->primary .= "T"; 00351 $this->secondary .= "T"; 00352 $this->current += 1; 00353 break; 00354 00355 case 'F': 00356 if (substr($this->original, $this->current + 1, 1) == 'F') 00357 $this->current += 2; 00358 else 00359 $this->current += 1; 00360 $this->primary .= "F"; 00361 $this->secondary .= "F"; 00362 break; 00363 00364 case 'G': 00365 if (substr($this->original, $this->current + 1, 1) == 'H') { 00366 if (($this->current > 0) 00367 && !$this->IsVowel($this->original, $this->current - 1)) { 00368 $this->primary .= "K"; 00369 $this->secondary .= "K"; 00370 $this->current += 2; 00371 break; 00372 } 00373 00374 if ($this->current < 3) { 00375 // 'ghislane', 'ghiradelli' 00376 if ($this->current == 0) { 00377 if (substr($this->original, $this->current + 2, 1) == 'I') { 00378 $this->primary .= "J"; 00379 $this->secondary .= "J"; 00380 } else { 00381 $this->primary .= "K"; 00382 $this->secondary .= "K"; 00383 } 00384 $this->current += 2; 00385 break; 00386 } 00387 } 00388 00389 // Parker's rule (with some further refinements) - e.g. 'hugh' 00390 if ((($this->current > 1) 00391 && $this->StringAt($this->original, $this->current - 2, 1, 00392 array("B","H","D"))) 00393 // e.g. 'bough' 00394 || (($this->current > 2) 00395 && $this->StringAt($this->original, $this->current - 3, 1, 00396 array("B","H","D"))) 00397 // e.g. 'broughton' 00398 || (($this->current > 3) 00399 && $this->StringAt($this->original, $this->current - 4, 1, 00400 array("B","H")))) { 00401 $this->current += 2; 00402 break; 00403 } else { 00404 // e.g. 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' 00405 if (($this->current > 2) 00406 && (substr($this->original, $this->current - 1, 1) == 'U') 00407 && $this->StringAt($this->original, $this->current - 3, 1, 00408 array("C","G","L","R","T"))) { 00409 $this->primary .= "F"; 00410 $this->secondary .= "F"; 00411 } elseif (($this->current > 0) 00412 && substr($this->original, $this->current - 1, 1) != 'I') { 00413 $this->primary .= "K"; 00414 $this->secondary .= "K"; 00415 } 00416 $this->current += 2; 00417 break; 00418 } 00419 } 00420 00421 if (substr($this->original, $this->current + 1, 1) == 'N') { 00422 if (($this->current == 1) && $this->IsVowel($this->original, 0) 00423 && !$this->SlavoGermanic($this->original)) { 00424 $this->primary .= "KN"; 00425 $this->secondary .= "N"; 00426 } else { 00427 // not e.g. 'cagney' 00428 if (!$this->StringAt($this->original, $this->current + 2, 2, 00429 array("EY")) 00430 && (substr($this->original, $this->current + 1) != "Y") 00431 && !$this->SlavoGermanic($this->original)) { 00432 $this->primary .= "N"; 00433 $this->secondary .= "KN"; 00434 } else { 00435 $this->primary .= "KN"; 00436 $this->secondary .= "KN"; 00437 } 00438 } 00439 $this->current += 2; 00440 break; 00441 } 00442 00443 // 'tagliaro' 00444 if ($this->StringAt($this->original, $this->current + 1, 2, 00445 array("LI")) 00446 && !$this->SlavoGermanic($this->original)) { 00447 $this->primary .= "KL"; 00448 $this->secondary .= "L"; 00449 $this->current += 2; 00450 break; 00451 } 00452 00453 // -ges-, -gep-, -gel- at beginning 00454 if (($this->current == 0) 00455 && ((substr($this->original, $this->current + 1, 1) == 'Y') 00456 || $this->StringAt($this->original, $this->current + 1, 2, 00457 array("ES","EP","EB","EL","EY","IB","IL","IN","IE", 00458 "EI","ER")))) { 00459 $this->primary .= "K"; 00460 $this->secondary .= "J"; 00461 $this->current += 2; 00462 break; 00463 } 00464 00465 // -ger-, -gy- 00466 if (($this->StringAt($this->original, $this->current + 1, 2, 00467 array("ER")) 00468 || (substr($this->original, $this->current + 1, 1) == 'Y')) 00469 && !$this->StringAt($this->original, 0, 6, 00470 array("DANGER","RANGER","MANGER")) 00471 && !$this->StringAt($this->original, $this->current -1, 1, 00472 array("E", "I")) 00473 && !$this->StringAt($this->original, $this->current -1, 3, 00474 array("RGY","OGY"))) { 00475 $this->primary .= "K"; 00476 $this->secondary .= "J"; 00477 $this->current += 2; 00478 break; 00479 } 00480 00481 // italian e.g. 'biaggi' 00482 if ($this->StringAt($this->original, $this->current + 1, 1, 00483 array("E","I","Y")) 00484 || $this->StringAt($this->original, $this->current -1, 4, 00485 array("AGGI","OGGI"))) { 00486 // obvious germanic 00487 if (($this->StringAt($this->original, 0, 4, array("VAN ", "VON ")) 00488 || $this->StringAt($this->original, 0, 3, array("SCH"))) 00489 || $this->StringAt($this->original, $this->current + 1, 2, 00490 array("ET"))) { 00491 $this->primary .= "K"; 00492 $this->secondary .= "K"; 00493 } else { 00494 // always soft if french ending 00495 if ($this->StringAt($this->original, $this->current + 1, 4, 00496 array("IER "))) { 00497 $this->primary .= "J"; 00498 $this->secondary .= "J"; 00499 } else { 00500 $this->primary .= "J"; 00501 $this->secondary .= "K"; 00502 } 00503 } 00504 $this->current += 2; 00505 break; 00506 } 00507 00508 if (substr($this->original, $this->current +1, 1) == 'G') 00509 $this->current += 2; 00510 else 00511 $this->current += 1; 00512 00513 $this->primary .= 'K'; 00514 $this->secondary .= 'K'; 00515 break; 00516 00517 case 'H': 00518 // only keep if first & before vowel or btw. 2 vowels 00519 if ((($this->current == 0) || 00520 $this->IsVowel($this->original, $this->current - 1)) 00521 && $this->IsVowel($this->original, $this->current + 1)) { 00522 $this->primary .= 'H'; 00523 $this->secondary .= 'H'; 00524 $this->current += 2; 00525 } else 00526 $this->current += 1; 00527 break; 00528 00529 case 'J': 00530 // obvious spanish, 'jose', 'san jacinto' 00531 if ($this->StringAt($this->original, $this->current, 4, 00532 array("JOSE")) 00533 || $this->StringAt($this->original, 0, 4, array("SAN "))) { 00534 if ((($this->current == 0) 00535 && (substr($this->original, $this->current + 4, 1) == ' ')) 00536 || $this->StringAt($this->original, 0, 4, array("SAN "))) { 00537 $this->primary .= 'H'; 00538 $this->secondary .= 'H'; 00539 } else { 00540 $this->primary .= "J"; 00541 $this->secondary .= 'H'; 00542 } 00543 $this->current += 1; 00544 break; 00545 } 00546 00547 if (($this->current == 0) 00548 && !$this->StringAt($this->original, $this->current, 4, 00549 array("JOSE"))) { 00550 $this->primary .= 'J'; // Yankelovich/Jankelowicz 00551 $this->secondary .= 'A'; 00552 } else { 00553 // spanish pron. of .e.g. 'bajador' 00554 if ($this->IsVowel($this->original, $this->current - 1) 00555 && !$this->SlavoGermanic($this->original) 00556 && ((substr($this->original, $this->current + 1, 1) == 'A') 00557 || (substr($this->original, $this->current + 1, 1) == 'O'))) { 00558 $this->primary .= "J"; 00559 $this->secondary .= "H"; 00560 } else { 00561 if ($this->current == $this->last) { 00562 $this->primary .= "J"; 00563 $this->secondary .= ""; 00564 } else { 00565 if (!$this->StringAt($this->original, $this->current + 1, 1, 00566 array("L","T","K","S","N","M","B","Z")) 00567 && !$this->StringAt($this->original, $this->current - 1, 1, 00568 array("S","K","L"))) { 00569 $this->primary .= "J"; 00570 $this->secondary .= "J"; 00571 } 00572 } 00573 } 00574 } 00575 00576 if (substr($this->original, $this->current + 1, 1) == 'J') // it could happen 00577 $this->current += 2; 00578 else 00579 $this->current += 1; 00580 break; 00581 00582 case 'K': 00583 if (substr($this->original, $this->current + 1, 1) == 'K') 00584 $this->current += 2; 00585 else 00586 $this->current += 1; 00587 $this->primary .= "K"; 00588 $this->secondary .= "K"; 00589 break; 00590 00591 case 'L': 00592 if (substr($this->original, $this->current + 1, 1) == 'L') { 00593 // spanish e.g. 'cabrillo', 'gallegos' 00594 if ((($this->current == ($this->length - 3)) 00595 && $this->StringAt($this->original, $this->current - 1, 4, 00596 array("ILLO","ILLA","ALLE"))) 00597 || (($this->StringAt($this->original, $this->last-1, 2, 00598 array("AS","OS")) 00599 || $this->StringAt($this->original, $this->last, 1, 00600 array("A","O"))) 00601 && $this->StringAt($this->original, $this->current - 1, 4, 00602 array("ALLE")))) { 00603 $this->primary .= "L"; 00604 $this->secondary .= ""; 00605 $this->current += 2; 00606 break; 00607 } 00608 $this->current += 2; 00609 } else 00610 $this->current += 1; 00611 $this->primary .= "L"; 00612 $this->secondary .= "L"; 00613 break; 00614 00615 case 'M': 00616 if (($this->StringAt($this->original, $this->current - 1, 3, 00617 array("UMB")) 00618 && ((($this->current + 1) == $this->last) 00619 || $this->StringAt($this->original, $this->current + 2, 2, 00620 array("ER")))) 00621 // 'dumb', 'thumb' 00622 || (substr($this->original, $this->current + 1, 1) == 'M')) { 00623 $this->current += 2; 00624 } else { 00625 $this->current += 1; 00626 } 00627 $this->primary .= "M"; 00628 $this->secondary .= "M"; 00629 break; 00630 00631 case 'N': 00632 if (substr($this->original, $this->current + 1, 1) == 'N') 00633 $this->current += 2; 00634 else 00635 $this->current += 1; 00636 $this->primary .= "N"; 00637 $this->secondary .= "N"; 00638 break; 00639 00640 case 'Ñ': 00641 $this->current += 1; 00642 $this->primary .= "N"; 00643 $this->secondary .= "N"; 00644 break; 00645 00646 case 'P': 00647 if (substr($this->original, $this->current + 1, 1) == 'H') { 00648 $this->current += 2; 00649 $this->primary .= "F"; 00650 $this->secondary .= "F"; 00651 break; 00652 } 00653 00654 // also account for "campbell" and "raspberry" 00655 if ($this->StringAt($this->original, $this->current + 1, 1, 00656 array("P","B"))) 00657 $this->current += 2; 00658 else 00659 $this->current += 1; 00660 $this->primary .= "P"; 00661 $this->secondary .= "P"; 00662 break; 00663 00664 case 'Q': 00665 if (substr($this->original, $this->current + 1, 1) == 'Q') 00666 $this->current += 2; 00667 else 00668 $this->current += 1; 00669 $this->primary .= "K"; 00670 $this->secondary .= "K"; 00671 break; 00672 00673 case 'R': 00674 // french e.g. 'rogier', but exclude 'hochmeier' 00675 if (($this->current == $this->last) 00676 && !$this->SlavoGermanic($this->original) 00677 && $this->StringAt($this->original, $this->current - 2, 2, 00678 array("IE")) 00679 && !$this->StringAt($this->original, $this->current - 4, 2, 00680 array("ME","MA"))) { 00681 $this->primary .= ""; 00682 $this->secondary .= "R"; 00683 } else { 00684 $this->primary .= "R"; 00685 $this->secondary .= "R"; 00686 } 00687 if (substr($this->original, $this->current + 1, 1) == 'R') 00688 $this->current += 2; 00689 else 00690 $this->current += 1; 00691 break; 00692 00693 case 'S': 00694 // special cases 'island', 'isle', 'carlisle', 'carlysle' 00695 if ($this->StringAt($this->original, $this->current - 1, 3, 00696 array("ISL","YSL"))) { 00697 $this->current += 1; 00698 break; 00699 } 00700 00701 // special case 'sugar-' 00702 if (($this->current == 0) 00703 && $this->StringAt($this->original, $this->current, 5, 00704 array("SUGAR"))) { 00705 $this->primary .= "X"; 00706 $this->secondary .= "S"; 00707 $this->current += 1; 00708 break; 00709 } 00710 00711 if ($this->StringAt($this->original, $this->current, 2, 00712 array("SH"))) { 00713 // germanic 00714 if ($this->StringAt($this->original, $this->current + 1, 4, 00715 array("HEIM","HOEK","HOLM","HOLZ"))) { 00716 $this->primary .= "S"; 00717 $this->secondary .= "S"; 00718 } else { 00719 $this->primary .= "X"; 00720 $this->secondary .= "X"; 00721 } 00722 $this->current += 2; 00723 break; 00724 } 00725 00726 // italian & armenian 00727 if ($this->StringAt($this->original, $this->current, 3, 00728 array("SIO","SIA")) 00729 || $this->StringAt($this->original, $this->current, 4, 00730 array("SIAN"))) { 00731 if (!$this->SlavoGermanic($this->original)) { 00732 $this->primary .= "S"; 00733 $this->secondary .= "X"; 00734 } else { 00735 $this->primary .= "S"; 00736 $this->secondary .= "S"; 00737 } 00738 $this->current += 3; 00739 break; 00740 } 00741 00742 // german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' 00743 // also, -sz- in slavic language altho in hungarian it is pronounced 's' 00744 if ((($this->current == 0) 00745 && $this->StringAt($this->original, $this->current + 1, 1, 00746 array("M","N","L","W"))) 00747 || $this->StringAt($this->original, $this->current + 1, 1, 00748 array("Z"))) { 00749 $this->primary .= "S"; 00750 $this->secondary .= "X"; 00751 if ($this->StringAt($this->original, $this->current + 1, 1, 00752 array("Z"))) 00753 $this->current += 2; 00754 else 00755 $this->current += 1; 00756 break; 00757 } 00758 00759 if ($this->StringAt($this->original, $this->current, 2, 00760 array("SC"))) { 00761 // Schlesinger's rule 00762 if (substr($this->original, $this->current + 2, 1) == 'H') 00763 // dutch origin, e.g. 'school', 'schooner' 00764 if ($this->StringAt($this->original, $this->current + 3, 2, 00765 array("OO","ER","EN","UY","ED","EM"))) { 00766 // 'schermerhorn', 'schenker' 00767 if ($this->StringAt($this->original, $this->current + 3, 2, 00768 array("ER","EN"))) { 00769 $this->primary .= "X"; 00770 $this->secondary .= "SK"; 00771 } else { 00772 $this->primary .= "SK"; 00773 $this->secondary .= "SK"; 00774 } 00775 $this->current += 3; 00776 break; 00777 } else { 00778 if (($this->current == 0) 00779 && !$this->IsVowel($this->original, 3) 00780 && (substr($this->original, $this->current + 3, 1) != 'W')) { 00781 $this->primary .= "X"; 00782 $this->secondary .= "S"; 00783 } else { 00784 $this->primary .= "X"; 00785 $this->secondary .= "X"; 00786 } 00787 $this->current += 3; 00788 break; 00789 } 00790 00791 if ($this->StringAt($this->original, $this->current + 2, 1, 00792 array("I","E","Y"))) { 00793 $this->primary .= "S"; 00794 $this->secondary .= "S"; 00795 $this->current += 3; 00796 break; 00797 } 00798 00799 // else 00800 $this->primary .= "SK"; 00801 $this->secondary .= "SK"; 00802 $this->current += 3; 00803 break; 00804 } 00805 00806 // french e.g. 'resnais', 'artois' 00807 if (($this->current == $this->last) 00808 && $this->StringAt($this->original, $this->current - 2, 2, 00809 array("AI","OI"))) { 00810 $this->primary .= ""; 00811 $this->secondary .= "S"; 00812 } else { 00813 $this->primary .= "S"; 00814 $this->secondary .= "S"; 00815 } 00816 00817 if ($this->StringAt($this->original, $this->current + 1, 1, 00818 array("S","Z"))) 00819 $this->current += 2; 00820 else 00821 $this->current += 1; 00822 break; 00823 00824 case 'T': 00825 if ($this->StringAt($this->original, $this->current, 4, 00826 array("TION"))) { 00827 $this->primary .= "X"; 00828 $this->secondary .= "X"; 00829 $this->current += 3; 00830 break; 00831 } 00832 00833 if ($this->StringAt($this->original, $this->current, 3, 00834 array("TIA","TCH"))) { 00835 $this->primary .= "X"; 00836 $this->secondary .= "X"; 00837 $this->current += 3; 00838 break; 00839 } 00840 00841 if ($this->StringAt($this->original, $this->current, 2, 00842 array("TH")) 00843 || $this->StringAt($this->original, $this->current, 3, 00844 array("TTH"))) { 00845 // special case 'thomas', 'thames' or germanic 00846 if ($this->StringAt($this->original, $this->current + 2, 2, 00847 array("OM","AM")) 00848 || $this->StringAt($this->original, 0, 4, array("VAN ","VON ")) 00849 || $this->StringAt($this->original, 0, 3, array("SCH"))) { 00850 $this->primary .= "T"; 00851 $this->secondary .= "T"; 00852 } else { 00853 $this->primary .= "0"; 00854 $this->secondary .= "T"; 00855 } 00856 $this->current += 2; 00857 break; 00858 } 00859 00860 if ($this->StringAt($this->original, $this->current + 1, 1, 00861 array("T","D"))) 00862 $this->current += 2; 00863 else 00864 $this->current += 1; 00865 $this->primary .= "T"; 00866 $this->secondary .= "T"; 00867 break; 00868 00869 case 'V': 00870 if (substr($this->original, $this->current + 1, 1) == 'V') 00871 $this->current += 2; 00872 else 00873 $this->current += 1; 00874 $this->primary .= "F"; 00875 $this->secondary .= "F"; 00876 break; 00877 00878 case 'W': 00879 // can also be in middle of word 00880 if ($this->StringAt($this->original, $this->current, 2, array("WR"))) { 00881 $this->primary .= "R"; 00882 $this->secondary .= "R"; 00883 $this->current += 2; 00884 break; 00885 } 00886 00887 if (($this->current == 0) 00888 && ($this->IsVowel($this->original, $this->current + 1) 00889 || $this->StringAt($this->original, $this->current, 2, 00890 array("WH")))) { 00891 // Wasserman should match Vasserman 00892 if ($this->IsVowel($this->original, $this->current + 1)) { 00893 $this->primary .= "A"; 00894 $this->secondary .= "F"; 00895 } else { 00896 // need Uomo to match Womo 00897 $this->primary .= "A"; 00898 $this->secondary .= "A"; 00899 } 00900 } 00901 00902 // Arnow should match Arnoff 00903 if ((($this->current == $this->last) 00904 && $this->IsVowel($this->original, $this->current - 1)) 00905 || $this->StringAt($this->original, $this->current - 1, 5, 00906 array("EWSKI","EWSKY","OWSKI","OWSKY")) 00907 || $this->StringAt($this->original, 0, 3, array("SCH"))) { 00908 $this->primary .= ""; 00909 $this->secondary .= "F"; 00910 $this->current += 1; 00911 break; 00912 } 00913 00914 // polish e.g. 'filipowicz' 00915 if ($this->StringAt($this->original, $this->current, 4, 00916 array("WICZ","WITZ"))) { 00917 $this->primary .= "TS"; 00918 $this->secondary .= "FX"; 00919 $this->current += 4; 00920 break; 00921 } 00922 00923 // else skip it 00924 $this->current += 1; 00925 break; 00926 00927 case 'X': 00928 // french e.g. breaux 00929 if (!(($this->current == $this->last) 00930 && ($this->StringAt($this->original, $this->current - 3, 3, 00931 array("IAU", "EAU")) 00932 || $this->StringAt($this->original, $this->current - 2, 2, 00933 array("AU", "OU"))))) { 00934 $this->primary .= "KS"; 00935 $this->secondary .= "KS"; 00936 } 00937 00938 if ($this->StringAt($this->original, $this->current + 1, 1, 00939 array("C","X"))) 00940 $this->current += 2; 00941 else 00942 $this->current += 1; 00943 break; 00944 00945 case 'Z': 00946 // chinese pinyin e.g. 'zhao' 00947 if (substr($this->original, $this->current + 1, 1) == "H") { 00948 $this->primary .= "J"; 00949 $this->secondary .= "J"; 00950 $this->current += 2; 00951 break; 00952 } elseif ($this->StringAt($this->original, $this->current + 1, 2, 00953 array("ZO", "ZI", "ZA")) 00954 || ($this->SlavoGermanic($this->original) 00955 && (($this->current > 0) 00956 && substr($this->original, $this->current - 1, 1) != 'T'))) { 00957 $this->primary .= "S"; 00958 $this->secondary .= "TS"; 00959 } else { 00960 $this->primary .= "S"; 00961 $this->secondary .= "S"; 00962 } 00963 00964 if (substr($this->original, $this->current + 1, 1) == 'Z') 00965 $this->current += 2; 00966 else 00967 $this->current += 1; 00968 break; 00969 00970 default: 00971 $this->current += 1; 00972 00973 } // end switch 00974 00975 // printf("<br>ORIGINAL: '%s'\n", $this->original); 00976 // printf("<br>current: '%s'\n", $this->current); 00977 // printf("<br> PRIMARY: '%s'\n", $this->primary); 00978 // printf("<br> SECONDARY: '%s'\n", $this->secondary); 00979 00980 } // end while 00981 00982 $this->primary = substr($this->primary, 0, 4); 00983 $this->secondary = substr($this->secondary, 0, 4); 00984 00985 $result["primary"] = $this->primary ; 00986 $result["secondary"] = $this->secondary ; 00987 00988 return $result ; 00989 00990 } // end of function MetaPhone 00991 00992 00993 // Private methods 00994 00995 function StringAt($string, $start, $length, $list) { 00996 if (($start <0) || ($start >= strlen($string))) 00997 return 0; 00998 00999 for ($i=0; $i<count($list); $i++) { 01000 if ($list[$i] == substr($string, $start, $length)) 01001 return 1; 01002 } 01003 return 0; 01004 } 01005 01006 function IsVowel($string, $pos) { 01007 return ereg("[AEIOUY]", substr($string, $pos, 1)); 01008 } 01009 01010 function SlavoGermanic($string) { 01011 return ereg("W|K|CZ|WITZ", $string); 01012 } 01013 } // end of class MetaPhone 01014 ?>