Use PHP functions in JavaScript

JavaScript metaphone

Break english phrases down into their phonemes

1
2
3
4
56
7
8
9
1011
12
13
14
1516
17
18
19
2021
22
23
24
2526
27
28
29
3031
32
33
34
3536
37
38
39
4041
42
43
44
4546
47
48
49
5051
52
53
54
5556
57
58
59
6061
62
63
64
6566
67
68
69
7071
72
73
74
7576
77
78
79
8081
82
83
84
8586
87
88
89
9091
92
93
94
9596
97
98
99
100101
102
103
104
105106
107
108
109
110111
112
113
114
115116
117
118
119
120121
122
123
124
125126
127
128
129
130131
132
133
134
135136
137
138
139
140141
142
143
144
145146
147
148
149
150151
152
153
154
155156
157
158
159
160161
162
163
164
165166
167
168
169
170171
172
173
174
175176
177
178
179
180181
182
183
184
185186
187
188
189
190191
192
193
194
195196
197
198
199
200201
202
203
204
205206
207
208
209
210211
212
213
214
215216
217
218
219
220221
222
223
224
225226
227
228
229
230231
232
233
234
235236
237
238
239
240241
242
243
244
245246
247
function metaphone (word, phones) {
    // +   original by: Greg Frazier
    // +   improved by: Brett Zamir (http://brett-zamir.me)
    // +   improved by: Rafał Kukawski (http://kukawski.pl)
    // *     example 1: metaphone('Gnu');    // *     returns 1: 'N'
    
    word = (word == null ? '' : word + '').toUpperCase();
    
    function isVowel (a) {        return 'AEIOU'.indexOf(a) !== -1;
    }
    
    function removeDuplicates (word) {
        var wordlength = word.length,            char1 = word.charAt(0),
            char2,
            rebuilt = char1;
            
        for (var i = 1; i < wordlength; i++) {            char2 = word.charAt(i);
            
            if (char2 !== char1 || char2 === 'C' || char2 === 'G') { // 'c' and 'g' are exceptions
                rebuilt += char2;
            }            char1 = char2;
        }
        
        return rebuilt;
    }    
    word = removeDuplicates(word);
 
    var wordlength = word.length,
        x = 0,        metaword = '';
 
    //Special wh- case
    if (word.substr(0, 2) === 'WH') {
        // Remove "h" and rebuild the string        word = 'W' + word.substr(2);
    }
    
    var cc = word.charAt(0); // current char. Short name cause it's used all over the function
    var pc = ''; // previous char. There is none when x === 0    var nc = word.charAt(1); // next char
    var nnc = ''; // 2 characters ahead. Needed later
    
    if (1 <= wordlength) {
        switch (cc) {        case 'A':
            if (nc === 'E') {
                metaword += 'E';
            } else {
                metaword += 'A';            }
            x += 1;
            break;
        case 'E': case 'I': case 'O': case 'U':
            metaword += cc;            x += 1;
            break;
        case 'G': case 'K': case 'P':
            if (nc === 'N') {
                x += 1;            }
            break;
        case 'W':
            if (nc === 'R') {
                x += 1;            }
            break;
        }
    }
     for (; x < wordlength; x++) {
        cc = word.charAt(x);
        pc = word.charAt(x - 1);
        nc = word.charAt(x + 1);
        nnc = word.charAt(x + 2);        
        if (!isVowel(cc)) {
            switch (cc) {
            case 'B':
                if (pc !== 'M') {                    metaword += 'B';
                }
                break;
            case 'C':
                if (x + 1 <= wordlength) {                    if (word.substr(x - 1, 3) !== 'SCH') {
                        if (x === 0 && (x + 2 <= wordlength) && isVowel(nnc)) {
                            metaword += 'K';
                        } else {
                            metaword += 'X';                        }
                    } else if (word.substr(x + 1, 2) === 'IA') {
                        metaword += 'X';
                    } else if ('IEY'.indexOf(nc) !== -1) {
                        if (x > 0) {                            if (pc !== 'S') {
                                metaword += 'S';
                            }
                        } else {
                            metaword += 'S';                        }
                    } else {
                        metaword += 'K';
                    }
                } else {                    metaword += 'K';
                }
                break;
            case 'D':
                if (x + 2 <= wordlength && nc === 'G' && 'EIY'.indexOf(nnc) !== -1) {                    metaword += 'J';
                    x += 2;
                } else {
                    metaword += 'T';
                }                break;
            case 'F':
                metaword += 'F';
                break;
            case 'G':                if (x < wordlength) {
                    if ((nc === 'N' && x + 1 === wordlength - 1) || (nc === 'N' && nnc === 'S' && x + 2 === wordlength - 1)) {
                        break;
                    }
                    if (word.substr(x + 1, 3) === 'NED' && x + 3 === wordlength - 1) {                        break;
                    }
                    if (word.substr(x - 2, 3) === 'ING' && x === wordlength - 1) {
                        break;
                    }                    
                    if (x + 1 <= wordlength - 1 && word.substr(x - 2, 4) === 'OUGH') {
                        metaword += 'F';
                        break;
                    }                    if (nc === 'H' && x + 2 <= wordlength) {
                        if (isVowel(nnc)) {
                            metaword += 'K';
                        }
                    } else if (x + 1 === wordlength) {                        if (nc !== 'N') {
                            metaword += 'K';
                        }
                    } else if (x + 3 === wordlength) {
                        if (word.substr(x + 1, 3) !== 'NED') {                            metaword += 'K';
                        }
                    } else if (x + 1 <= wordlength) {
                        if ('EIY'.indexOf(nc) !== -1) {
                            if (pc != 'G') {                                metaword += 'J';
                            }
                        } else if (x === 0 || pc !== 'D' || 'EIY'.indexOf(nc) === -1) {
                            metaword += 'K';
                        }                    } else {
                        metaword += 'K';
                    }
                } else {
                    metaword += 'K';                }
                break;
            case 'M': case 'J': case 'N': case 'R': case 'L':
                metaword += cc;
                break;            case 'Q':
                metaword += 'K';
                break;
            case 'V':
                metaword += 'F';                break;
            case 'Z':
                metaword += 'S';
                break;
            case 'X':                metaword += (x === 0) ? 'S' : 'KS';
                break;
            case 'K':
                if (x === 0 || pc !== 'C') {
                    metaword += 'K';                }
                break;
            case 'P':
                if (x + 1 <= wordlength && nc === 'H') {
                    metaword += 'F';                } else {
                    metaword += 'P';
                }
                break;
            case 'Y':                if (x + 1 > wordlength || isVowel(nc)) {
                    metaword += 'Y';
                }
                break;
            case 'H':                if (x === 0 || 'CSPTG'.indexOf(pc) === -1) {
                    if (isVowel(nc) === true) {
                        metaword += 'H';
                    }
                }                break;
            case 'S':
                if (x + 1 <= wordlength) {
                    if (nc === 'H') {
                        metaword += 'X';                    } else if (x + 2 <= wordlength && nc === 'I' && 'AO'.indexOf(nnc) !== -1) {
                        metaword += 'X';
                    } else {
                        metaword += 'S';
                    }                } else {
                    metaword += 'S';
                }
                break;
            case 'T':                if (x + 1 <= wordlength) {
                    if (nc === 'H') {
                        metaword += '0';
                    } else if (x + 2 <= wordlength && nc === 'I' && 'AO'.indexOf(nnc) !== -1) {
                        metaword += 'X';                    } else {
                        metaword += 'T';
                    }
                } else {
                    metaword += 'T';                }
                break;
            case 'W':
                if (x + 1 <= wordlength && isVowel(nc)) {
                    metaword += 'W';                }
                break;
            }
        }
    } 
    phones = parseInt(phones, 10);
    if (metaword.length > phones) {
        return metaword.substr(0, phones);
    }    return metaword;
}
external links: original PHP docs | raw js source

Examples

Running

1
metaphone('Gnu');

Should return

1
'N'

Dependencies

No dependencies, you can use this function standalone.

Open syntax issues

php.js uses JsLint to help us keep our code consistent and prevent some common bugs.

Eventually we want all code to pass or at least take into consideration most fixes suggested by JsLint, following this JsLint configuration we’ve decided on.


Authors

Thanks to the following developers, you get to have metaphone goodness in JavaScript.

Comments

Add Comment
Use:
[CODE]
your_stuff('here');
[/CODE]
for proper code formatting
By submitting code here you are allowing us to use it in php.js hence dual licensing it under the MIT and GPL licenses

Gravatar
Lawrence Philips
4 Jun '11 Permalink

q  re: correct implementations of metaphone, specifically whether to code "CH-<vowel>" as K or X

This is in fact an incredibly complicated question, since the number of exceptions in the pronunciation of consonants and consonant groups in English is huge. These issues are only really addressed correctly in the third generation of the Metaphone algorithm, Metaphone 3

Gravatar
Brett Zamir
6 May '11 Permalink

q  @Rey: According to the original BASIC implementation of metaphone at http://aspell.net/metaphone/metaphone.basic (though not mentioned in summaries of the algorithm such as http://en.wikipedia.org/wiki/Metaphone ), "Ch" at the beginning of a word and then followed by a vowel should produce "k". It seems to depend on what metaphone version is being targeted (see http://aspell.net/metaphone/ ). The PHP source code had a "traditional" flag, apparently to distinguish this original algorithm from others, but this was not apparently exposed anywhere in the public API.

Practically speaking, there are examples like "Character", "Charisma", and "Chyme" where "k" makes sense, but other counter-examples like "Check", "Chick", "Chinese", "Chore", etc. which might indicate otherwise (seems 'e', 'i', 'o' should have been distinguished from other vowels based on this sample anyways, though there is also "Chad").

I went with the original algorithm, thinking http://bugs.php.net/bug.php?id=48711 might end up getting PHP to follow the original.

Lines 114-118 in our code deal with this, so feel free to alter it in your own copy, as this seems up for grabs to me (though maybe we should revert to PHP behavior, allowing configuration to allow the deviation until such time as PHP itself may resolve the issue).

Gravatar
Rafał Kukawski
6 May '11 Permalink

q  @Rey: yeah, the metaphone requires a big cleanup and some bug fixing. But the PHP implementation isn't bug free, nor does it follow the metaphone "specs". There are some differences.
I did a rewrite of the function, that you can see on github https://github.com/kvz/phpjs/raw/master/_workbench/strings/metaphone.js You can check if this one suits your needs (it lacks phonemes param support, but that's easy to do). The problem is, it's difficult to test if a metaphone implementation works correctly. For many cases also the current implementation works, but as you've proven it's not bug free.
I will try to unit test that one ASAP make it a final release.

Gravatar
Rey
5 May '11 Permalink

q  The following term returns different results on php and js:

Chick-fil-A

PHP result:
XKFL

JS:
KKFL

I'm not sure which one is correct.


Contribute a New function