Use PHP functions in JavaScript

JavaScript utf8_encode

Encodes an ISO-8859-1 string to UTF-8

1
2
3
4
56
7
8
9
1011
12
13
14
1516
17
18
19
2021
22
23
24
2526
27
28
29
3031
32
33
34
3536
37
38
39
4041
42
43
44
4546
47
48
49
function utf8_encode ( argString ) {
    // Encodes an ISO-8859-1 string to UTF-8  
    // 
    // version: 909.322
    // discuss at: http://phpjs.org/functions/utf8_encode    // +   original by: Webtoolkit.info (http://www.webtoolkit.info/)
    // +   improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
    // +   improved by: sowberry
    // +    tweaked by: Jack
    // +   bugfixed by: Onno Marsman    // +   improved by: Yves Sucaet
    // +   bugfixed by: Onno Marsman
    // +   bugfixed by: Ulrich
    // *     example 1: utf8_encode('Kevin van Zonneveld');
    // *     returns 1: 'Kevin van Zonneveld'    var string = (argString+''); // .replace(/\r\n/g, "\n").replace(/\r/g, "\n");
 
    var utftext = "";
    var start, end;
    var stringl = 0; 
    start = end = 0;
    stringl = string.length;
    for (var n = 0; n < stringl; n++) {
        var c1 = string.charCodeAt(n);        var enc = null;
 
        if (c1 < 128) {
            end++;
        } else if (c1 > 127 && c1 < 2048) {            enc = String.fromCharCode((c1 >> 6) | 192) + String.fromCharCode((c1 & 63) | 128);
        } else {
            enc = String.fromCharCode((c1 >> 12) | 224) + String.fromCharCode(((c1 >> 6) & 63) | 128) + String.fromCharCode((c1 & 63) | 128);
        }
        if (enc !== null) {            if (end > start) {
                utftext += string.substring(start, end);
            }
            utftext += enc;
            start = end = n+1;        }
    }
 
    if (end > start) {
        utftext += string.substring(start, string.length);    }
 
    return utftext;
}
external links: original PHP docs | raw js source

Examples

Running

1
utf8_encode('Kevin van Zonneveld');

Should return

1
'Kevin van Zonneveld'

Dependencies

No dependencies, you can use this function standalone.

Open syntax issues

php.js uses JsLint to help us keep our code consistent and prevent some common bugs.

Eventually we want all code to pass or at least take into consideration most fixes suggested by JsLint, following this JsLint configuration we’ve decided on.


Authors

Thanks to the following developers, you get to have utf8_encode goodness in JavaScript.

Comments

Add Comment
Use:
[CODE]
your_stuff('here');
[/CODE]
for proper code formatting
By submitting code here you are allowing us to use it in php.js hence dual licensing it under the MIT and GPL licenses

Gravatar
Keith
1 Dec '09 Permalink

q   This function will throw an exception if passed an empty string.

I think it needs to include "

1
try {} catch(e) {} return'';
" around its contents and the following line at the start:

1
if (argString == '') return '';

Gravatar
Ben Pettit
5 May '09 Permalink

q   I made a fix so this function ran correctly in adobe javascript.

1
2
3
4
56
7
8
9
1011
12
13
14
1516
17
18
19
2021
22
function utf8_encode ( string ) {
    // Encodes an ISO-8859-1 string to UTF-8  
    // 
    // version: 812.316
    // discuss at: http://phpjs.org/functions/utf8_encode    // +   original by: Webtoolkit.info (http://www.webtoolkit.info/)
    // +   improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
    // +   improved by: sowberry
    // +    tweaked by: Jack
    // +   bugfixed by: Onno Marsman    // +   improved by: Yves Sucaet
    // +   bugfixed by: Onno Marsman
    // +   adobe js by: Ben Pettit
    // *     example 1: utf8_encode('Kevin van Zonneveld');
    // *     returns 1: 'Kevin van Zonneveld'    string      =       string.valueOf(); //    <-bp:  I added this line.
    
    string = (string+'').replace(/\r\n/g, "\n").replace(/\r/g, "\n");
 
    var utftext = "";    var start, end;
    var stringl = 0;

Gravatar
Kevin van Zonneveld
14 Nov '08 Permalink

q  @ Onno Marsman: Sjeesh, it has been a long day... But that long.. Thx Onno.

Gravatar
Onno Marsman
14 Nov '08 Permalink

q   This is just weird. Of course the extra (string+'') is not necessary. The following would do exactly the same:

1
string = (string+'').replace(/\r\n/g, &quot;\n&quot;).replace(/\r/g, &quot;\n&quot;);


or even something like (not tested):
1
string = (string+'').replace(/\r\n?/g, &quot;\n&quot;);

Gravatar
Kevin van Zonneveld
13 Nov '08 Permalink

q  @ Yves Sucaet: I don't see the harm in that :) thank you Yves!

Gravatar
Yves Sucaet
12 Nov '08 Permalink

q   I think it makes sense to replace

1
string = (string+'').replace(/\r\n/g,&quot;\n&quot;);


with

1
2
string = (string+'').replace(/\r\n/g,&quot;\n&quot;);
string = (string+'').replace(/\r/g,&quot;\n&quot;);

Gravatar
Kevin van Zonneveld
27 Aug '08 Permalink

q  @ sowberry: Thank you for your improvement!

Gravatar
sowberry
8 Aug '08 Permalink

q   While looking for a javascript crc script, I found the version on webtoolkit.info as well as your subsequent modification.

Testing with a chunk of text a couple hundred characters long, with just a couple non-ascii values, I saw no significant improvement with your approach of using an array as a pseudo-StringBuilder. The issue is the use of String.fromCharCode for even ascii values, which forces too many string creations. The code below is about 3 times faster in my tests:

1
2
3
4
56
7
8
9
1011
12
13
14
1516
17
18
19
2021
22
23
24
2526
27
28
29
3031
32
33
34
3536
37
38
function utf8_encode(string) {
    string = string.replace(/\r\n/g,&quot;\n&quot;);
    var utftext = &quot;&quot;;
    var start, end;
     start = end = 0;
    for (var n = 0; n &lt; string.length; n++) {
 
        var c = string.charCodeAt(n);
        var enc = null; 
        if (c &lt; 128) {
            end++;
        }
        else if((c &gt; 127) &amp;&amp; (c &lt; 2048)) {            enc = String.fromCharCode((c &gt;&gt; 6) | 192) + String.fromCharCode((c &amp; 63) | 128);
        }
        else {
            enc = String.fromCharCode((c &gt;&gt; 12) | 224) + String.fromCharCode(((c &gt;&gt; 6) &amp; 63) | 128) + String.fromCharCode((c &amp; 63) | 128);
        }        if (enc != null)
        {
            if (end &gt; start)
            {
                utftext += string.substring(start, end);            }
            utftext += enc;
            start = end = n+1;
        }
     }
    if (end &gt; start)
    {
        utftext += string.substring(start, string.length);
    } 
    return utftext;
}


Please feel free to post this to the various script repositories, as I am not especially active on the web. Thanks.


Contribute a New function

More functions

In this category

utf8_decode
» utf8_encode

Support us

spread the word:


Use any PHP function in JavaScript


These kind folks have already donated: Anonymous and Shawn Houser.
<your name here>

Click here to lend your support to: phpjs and make a donation at www.pledgie.com !