Use PHP functions in JavaScript

JavaScript get_html_translation_table

Returns the internal translation table used by htmlspecialchars and htmlentities

1
2
3
4
56
7
8
9
1011
12
13
14
1516
17
18
19
2021
22
23
24
2526
27
28
29
3031
32
33
34
3536
37
38
39
4041
42
43
44
4546
47
48
49
5051
52
53
54
5556
57
58
59
6061
62
63
64
6566
67
68
69
7071
72
73
74
7576
77
78
79
8081
82
83
84
8586
87
88
89
9091
92
93
94
9596
97
98
99
100101
102
103
104
105106
107
108
109
110111
112
113
114
115116
117
118
119
120121
122
123
124
125126
127
128
129
130131
132
133
134
135136
137
138
139
140141
142
143
144
145146
147
148
149
150151
152
153
154
155156
157
158
159
160161
function get_html_translation_table (table, quote_style) {
    // Returns the internal translation table used by htmlspecialchars and htmlentities  
    // 
    // version: 909.322
    // discuss at: http://phpjs.org/functions/get_html_translation_table    // +   original by: Philip Peterson
    // +    revised by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
    // +   bugfixed by: noname
    // +   bugfixed by: Alex
    // +   bugfixed by: Marco    // +   bugfixed by: madipta
    // +   improved by: KELAN
    // +   improved by: Brett Zamir (http://brett-zamir.me)
    // +   bugfixed by: Brett Zamir (http://brett-zamir.me)
    // +      input by: Frank Forte    // +   bugfixed by: T.Wild
    // +      input by: Ratheous
    // %          note: It has been decided that we're not going to add global
    // %          note: dependencies to php.js, meaning the constants are not
    // %          note: real constants, but strings instead. Integers are also supported if someone    // %          note: chooses to create the constants themselves.
    // *     example 1: get_html_translation_table('HTML_SPECIALCHARS');
    // *     returns 1: {'"': '&quot;', '&': '&amp;', '<': '&lt;', '>': '&gt;'}
    
    var entities = {}, hash_map = {}, decimal = 0, symbol = '';    var constMappingTable = {}, constMappingQuoteStyle = {};
    var useTable = {}, useQuoteStyle = {};
    
    // Translate arguments
    constMappingTable[0]      = 'HTML_SPECIALCHARS';    constMappingTable[1]      = 'HTML_ENTITIES';
    constMappingQuoteStyle[0] = 'ENT_NOQUOTES';
    constMappingQuoteStyle[2] = 'ENT_COMPAT';
    constMappingQuoteStyle[3] = 'ENT_QUOTES';
     useTable       = !isNaN(table) ? constMappingTable[table] : table ? table.toUpperCase() : 'HTML_SPECIALCHARS';
    useQuoteStyle = !isNaN(quote_style) ? constMappingQuoteStyle[quote_style] : quote_style ? quote_style.toUpperCase() : 'ENT_COMPAT';
 
    if (useTable !== 'HTML_SPECIALCHARS' && useTable !== 'HTML_ENTITIES') {
        throw new Error("Table: "+useTable+' not supported');        // return false;
    }
 
    entities['38'] = '&amp;';
    if (useTable === 'HTML_ENTITIES') {        entities['160'] = '&nbsp;';
        entities['161'] = '&iexcl;';
        entities['162'] = '&cent;';
        entities['163'] = '&pound;';
        entities['164'] = '&curren;';        entities['165'] = '&yen;';
        entities['166'] = '&brvbar;';
        entities['167'] = '&sect;';
        entities['168'] = '&uml;';
        entities['169'] = '&copy;';        entities['170'] = '&ordf;';
        entities['171'] = '&laquo;';
        entities['172'] = '&not;';
        entities['173'] = '&shy;';
        entities['174'] = '&reg;';        entities['175'] = '&macr;';
        entities['176'] = '&deg;';
        entities['177'] = '&plusmn;';
        entities['178'] = '&sup2;';
        entities['179'] = '&sup3;';        entities['180'] = '&acute;';
        entities['181'] = '&micro;';
        entities['182'] = '&para;';
        entities['183'] = '&middot;';
        entities['184'] = '&cedil;';        entities['185'] = '&sup1;';
        entities['186'] = '&ordm;';
        entities['187'] = '&raquo;';
        entities['188'] = '&frac14;';
        entities['189'] = '&frac12;';        entities['190'] = '&frac34;';
        entities['191'] = '&iquest;';
        entities['192'] = '&Agrave;';
        entities['193'] = '&Aacute;';
        entities['194'] = '&Acirc;';        entities['195'] = '&Atilde;';
        entities['196'] = '&Auml;';
        entities['197'] = '&Aring;';
        entities['198'] = '&AElig;';
        entities['199'] = '&Ccedil;';        entities['200'] = '&Egrave;';
        entities['201'] = '&Eacute;';
        entities['202'] = '&Ecirc;';
        entities['203'] = '&Euml;';
        entities['204'] = '&Igrave;';        entities['205'] = '&Iacute;';
        entities['206'] = '&Icirc;';
        entities['207'] = '&Iuml;';
        entities['208'] = '&ETH;';
        entities['209'] = '&Ntilde;';        entities['210'] = '&Ograve;';
        entities['211'] = '&Oacute;';
        entities['212'] = '&Ocirc;';
        entities['213'] = '&Otilde;';
        entities['214'] = '&Ouml;';        entities['215'] = '&times;';
        entities['216'] = '&Oslash;';
        entities['217'] = '&Ugrave;';
        entities['218'] = '&Uacute;';
        entities['219'] = '&Ucirc;';        entities['220'] = '&Uuml;';
        entities['221'] = '&Yacute;';
        entities['222'] = '&THORN;';
        entities['223'] = '&szlig;';
        entities['224'] = '&agrave;';        entities['225'] = '&aacute;';
        entities['226'] = '&acirc;';
        entities['227'] = '&atilde;';
        entities['228'] = '&auml;';
        entities['229'] = '&aring;';        entities['230'] = '&aelig;';
        entities['231'] = '&ccedil;';
        entities['232'] = '&egrave;';
        entities['233'] = '&eacute;';
        entities['234'] = '&ecirc;';        entities['235'] = '&euml;';
        entities['236'] = '&igrave;';
        entities['237'] = '&iacute;';
        entities['238'] = '&icirc;';
        entities['239'] = '&iuml;';        entities['240'] = '&eth;';
        entities['241'] = '&ntilde;';
        entities['242'] = '&ograve;';
        entities['243'] = '&oacute;';
        entities['244'] = '&ocirc;';        entities['245'] = '&otilde;';
        entities['246'] = '&ouml;';
        entities['247'] = '&divide;';
        entities['248'] = '&oslash;';
        entities['249'] = '&ugrave;';        entities['250'] = '&uacute;';
        entities['251'] = '&ucirc;';
        entities['252'] = '&uuml;';
        entities['253'] = '&yacute;';
        entities['254'] = '&thorn;';        entities['255'] = '&yuml;';
    }
 
    if (useQuoteStyle !== 'ENT_NOQUOTES') {
        entities['34'] = '&quot;';    }
    if (useQuoteStyle === 'ENT_QUOTES') {
        entities['39'] = '&#39;';
    }
    entities['60'] = '&lt;';    entities['62'] = '&gt;';
 
 
    // ascii decimals to real symbols
    for (decimal in entities) {        symbol = String.fromCharCode(decimal);
        hash_map[symbol] = entities[decimal];
    }
    
    return hash_map;}
external links: original PHP docs | raw js source

Examples

Running

1
get_html_translation_table('HTML_SPECIALCHARS');

Should return

1
{'"': '&quot;', '&': '&amp;', '<': '&lt;', '>': '&gt;'}

Dependencies

No dependencies, you can use this function standalone.

Open syntax issues

php.js uses JsLint to help us keep our code consistent and prevent some common bugs.

Eventually we want all code to pass or at least take into consideration most fixes suggested by JsLint, following this JsLint configuration we’ve decided on.


Authors

Thanks to the following developers, you get to have get_html_translation_table goodness in JavaScript.

Comments

Add Comment
Use:
[CODE]
your_stuff('here');
[/CODE]
for proper code formatting
By submitting code here you are allowing us to use it in php.js hence dual licensing it under the MIT and GPL licenses

Gravatar
Kevin van Zonneveld
14 Dec '09 Permalink

q  @ Nick Kolosov: Thanks for reporting.

@ Fox: Thanks for fixing : )

Will be online shortly folks.

Gravatar
Fox
2 Dec '09 Permalink

q   Sure it's not a good solution, but to answer to Nick Kolosov, and as I had the same problem with using :
- htmlspecialchars => encode
- html_entity_decode => decode
And my aim was to not add parameters in functions (keep functions like php). I decided to add a small code in decode functions in order to fix the problem.
in html_entity_decode function before :

1
for (symbol in hash_map) {

add the following lines :
1
2
3
4
// BOF : fix &amp; problem
  delete(hash_map['&']);
  hash_map['&'] = '&amp;';
  // EOF : fix &amp; problem

Gravatar
Nick Kolosov
17 Nov '09 Permalink

q   Ups, blog eated html tags. Error example:

1
html_entity_decode('&amp;nbsp;') = ' ' instead of '&nbsp'

Gravatar
Brett Zamir
20 Oct '09 Permalink

q  @Roger: Yes, things can be that easy, if that's what you are trying to do. However, your function creating numeric character references has no relation to substituting for get_html_translation_table() for those who need it (nor for htmlentities() or htmlspecialchars() which depend on it).

Gravatar
Roger
20 Oct '09 Permalink

q   Things can be so easy:

1
2
3
4
56
7
8
9
10
function toHTMLEntity(str) {
        var s = str.split("");
        var ret = "";
        for (i = 0; i < s.length; i++) {
                var c = s[i].charCodeAt(0);                if(c > 127) ret += ("&#" + c + ";");
                else ret += s[i];
        }
        return ret;
}

Gravatar
Kevin van Zonneveld
8 Jul '09 Permalink

q  Great stuff!

Gravatar
Brett Zamir
5 Jul '09 Permalink

q  It was fixed recently in subversion (SVN). It just needed some time to be made available.

Yes, it's true that ECMAScript doesn't guarantee the order of execution within objects, but I understand that all major browsers maintain the order (and PHP.JS in general depends on this, being as we rely on objects for associative array-like behavior).

Good point about "histogram". Maybe someone copied it from count_chars() which looks like that one used the word correctly. Anyways, I fixed it for the other functions (entity ones) where it was indeed not correct.

Gravatar
Ratheous
4 Jul '09 Permalink

q   Because the ampersand is used in all entities, and htmlspecialchars etc. washes the string through the split and join repeatedly, entities['38'] should be the first item in the array and the first character replaced. Thus any ampersand already in the string will be correctly replaced but those introduced by the replacement of other characters will remain intact.

Someone may have commented on this previously; it's hard to tell because the comments are a bit hard to follow, but regardless, it hasn't been fixed.

Moving it up to line 40 solves the problem in my code, but if I remember correctly the use of for...in doesn't guarantee iterators in a particular order so it might be better to take it out of the entities table and replace it separately (though in my experience they come out in the order they were assigned).

P.S. Just as an observation, you use 'histogram' as a variable name in a number of functions for what is actually a hash table...

his?to?gram?/?h?st??græm/

–noun Statistics.
a graph of a frequency distribution in which rectangles with bases on the horizontal axis are given widths equal to the class intervals and heights equal to the corresponding frequencies.

Gravatar
Brett Zamir
30 Apr '09 Permalink

q  Thanks... Done in SVN. I also simplified the if/else following

Gravatar
KELAN
30 Apr '09 Permalink

q   useQuoteStyle = !isNaN(quote_style) ? constMappingQuoteStyle[quote_style] : quote_style ? quote_style.toUpperCase() : 'ENT_COMPAT';

Gravatar
KELAN
30 Apr '09 Permalink

q  

1
2
3
4
56
7
8
9
1011
12
13
14
1516
17
useTable      = (table ? table.toUpperCase() : 'HTML_SPECIALCHARS');
    useQuoteStyle = (quote_style ? quote_style.toUpperCase() : 'ENT_COMPAT');
    
    // Translate arguments
    constMappingTable[0]      = 'HTML_SPECIALCHARS';    constMappingTable[1]      = 'HTML_ENTITIES';
    constMappingQuoteStyle[0] = 'ENT_NOQUOTES';
    constMappingQuoteStyle[2] = 'ENT_COMPAT';
    constMappingQuoteStyle[3] = 'ENT_QUOTES';
        // Map numbers to strings for compatibilty with PHP constants
    if (!isNaN(useTable)) {
        useTable = constMappingTable[useTable];
    }
    if (!isNaN(useQuoteStyle)) {        useQuoteStyle = constMappingQuoteStyle[useQuoteStyle];
    }


==> get_html_translation_table(0,2);

1
2
3
4
56
7
constMappingTable[0]      = 'HTML_SPECIALCHARS';
    constMappingTable[1]      = 'HTML_ENTITIES';
    constMappingQuoteStyle[0] = 'ENT_NOQUOTES';
    constMappingQuoteStyle[2] = 'ENT_COMPAT';
    constMappingQuoteStyle[3] = 'ENT_QUOTES';        useTable          = !isNaN(table) ? constMappingTable[table] : table ? table.toUpperCase() : 'HTML_SPECIALCHARS';
        useQuoteStyle = !isNaN(quote_style) ? constMappingQuoteStyle[table] : quote_style ? quote_style.toUpperCase() : 'ENT_COMPAT';

Gravatar
Kevin van Zonneveld
2 Mar '09 Permalink

q  @ madipta: Thank you: fixed.

Gravatar
madipta
26 Feb '09 Permalink

q   i think you need to move entities['38'] on top

1
2
3
4
56
7
8
9
entities['38'] = '&amp;';
 
    if (useQuoteStyle != 'ENT_NOQUOTES') {
        entities['34'] = '&quot;';
    } 
    if (useQuoteStyle == 'ENT_QUOTES') {
        entities['39'] = ''';
    }

Gravatar
Kevin van Zonneveld
25 Feb '09 Permalink

q  @ Marco: Excellent suggestion, thanks!

Gravatar
Marco
23 Feb '09 Permalink

q   I suggest that you add a ; after the following code so that the script can be packed to one line (for example with: http://dean.edwards.name/packer/)

1
symbol = String.fromCharCode(decimal)

Gravatar
Kevin van Zonneveld
21 Feb '09 Permalink

q  @ Alex: Ok I've changed it in SVN, can you tell me why is that?

Gravatar
Alex
20 Feb '09 Permalink

q   need replace:
entities['39'] = '&#039;';
to
entities['39'] = '&#39;';

Gravatar
Kevin van Zonneveld
1 Dec '08 Permalink

q  @ GreLI: It was easier developing (read: copy & pasting ;) that way. We might want to switch back to reduce it's size though, that's a good point, thanks.

Gravatar
GreLI
30 Nov '08 Permalink

q   Instead of this:

1
2
3
entities['38'] = '&amp;amp;amp;';
entities['60'] = '&amp;amp;lt;';
entities['62'] = '&amp;amp;gt;';

You can write
1
2
3
4
5
entities = {
'38': '&amp;amp;amp;',
'60': '&amp;amp;lt;',
'62': '&amp;amp;gt;'
}

to reduce size and increase readability.

Gravatar
Kevin van Zonneveld
3 Nov '08 Permalink

q  @ noname: I've committed your fix noname, thanks!

Gravatar
noname
25 Oct '08 Permalink

q   You need to change position for some lines.
From:

1
2
3
entities['60'] = '&amp;lt;';
    entities['62'] = '&amp;gt;';
    entities['38'] = '&amp;amp;';


To:
1
2
3
entities['38'] = '&amp;amp;';
    entities['60'] = '&amp;lt;';
    entities['62'] = '&amp;gt;';


Because it will be encode wrong. Example:
<a> => &amp;lt;a&amp;gt;


Contribute a New function