Use PHP functions in JavaScript

JavaScript htmlspecialchars_decode

Convert special HTML entities back to characters

1
2
3
4
56
7
8
9
1011
12
13
14
1516
17
18
19
2021
22
23
24
2526
27
28
29
3031
32
33
34
3536
37
38
39
4041
42
43
44
4546
47
48
49
5051
52
53
54
5556
57
58
59
6061
62
63
64
function htmlspecialchars_decode (string, quote_style) {
    // Convert special HTML entities back to characters  
    // 
    // version: 1008.1718
    // discuss at: http://phpjs.org/functions/htmlspecialchars_decode    // +   original by: Mirek Slugen
    // +   improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
    // +   bugfixed by: Mateusz "loonquawl" Zalega
    // +      input by: ReverseSyntax
    // +      input by: Slawomir Kaniecki    // +      input by: Scott Cariss
    // +      input by: Francois
    // +   bugfixed by: Onno Marsman
    // +    revised by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
    // +   bugfixed by: Brett Zamir (http://brett-zamir.me)    // +      input by: Ratheous
    // +      input by: Mailfaker (http://www.weedem.fr/)
    // +      reimplemented by: Brett Zamir (http://brett-zamir.me)
    // +    bugfixed by: Brett Zamir (http://brett-zamir.me)
    // *     example 1: htmlspecialchars_decode("<p>this -&gt; &quot;</p>", 'ENT_NOQUOTES');    // *     returns 1: '<p>this -> &quot;</p>'
    // *     example 2: htmlspecialchars_decode("&amp;quot;");
    // *     returns 2: '&quot;'
    var optTemp = 0, i = 0, noquotes= false;
    if (typeof quote_style === 'undefined') {        quote_style = 2;
    }
    string = string.toString().replace(/&lt;/g, '<').replace(/&gt;/g, '>');
    var OPTS = {
        'ENT_NOQUOTES': 0,        'ENT_HTML_QUOTE_SINGLE' : 1,
        'ENT_HTML_QUOTE_DOUBLE' : 2,
        'ENT_COMPAT': 2,
        'ENT_QUOTES': 3,
        'ENT_IGNORE' : 4    };
    if (quote_style === 0) {
        noquotes = true;
    }
    if (typeof quote_style !== 'number') { // Allow for a single string or an array of string flags        quote_style = [].concat(quote_style);
        for (i=0; i < quote_style.length; i++) {
            // Resolve string input to bitwise e.g. 'PATHINFO_EXTENSION' becomes 4
            if (OPTS[quote_style[i]] === 0) {
                noquotes = true;            }
            else if (OPTS[quote_style[i]]) {
                optTemp = optTemp | OPTS[quote_style[i]];
            }
        }        quote_style = optTemp;
    }
    if (quote_style & OPTS.ENT_HTML_QUOTE_SINGLE) {
        string = string.replace(/&#0*39;/g, "'"); // PHP doesn't currently escape if more than one 0, but it should
        // string = string.replace(/&apos;|&#x0*27;/g, "'"); // This would also be useful here, but not a part of PHP    }
    if (!noquotes) {
        string = string.replace(/&quot;/g, '"');
    }
    // Put this in last place to avoid escape being double-decoded    string = string.replace(/&amp;/g, '&');
 
    return string;
}
external links: original PHP docs | raw js source

Examples

» Example 1

Running

1
htmlspecialchars_decode("<p>this -&gt; &quot;</p>", 'ENT_NOQUOTES');

Should return

1
'<p>this -> &quot;</p>'

» Example 2

Running

1
htmlspecialchars_decode("&amp;quot;");

Should return

1
'&quot;'

Dependencies

No dependencies, you can use this function standalone.

Open syntax issues

php.js uses JsLint to help us keep our code consistent and prevent some common bugs.

Eventually we want all code to pass or at least take into consideration most fixes suggested by JsLint, following this JsLint configuration we’ve decided on.


Authors

Thanks to the following developers, you get to have htmlspecialchars_decode goodness in JavaScript.

Comments

Add Comment
Use:
[CODE]
your_stuff('here');
[/CODE]
for proper code formatting
By submitting code here you are allowing us to use it in php.js hence dual licensing it under the MIT and GPL licenses

Gravatar
Robert Sidlauskas
Jul 10th Permalink

q  Hi its good.

<a href='http://filesharepoit.com'>Filesharepoint.com</a>

Gravatar
Jerry
Apr 27th Permalink

q  Very nice - I think I will use your modification as its much tidier.
Don't forget the 'g' attribute on the last pattern.

Gravatar
Rafał Kukawski
Apr 27th Permalink

q  Sorry for the double comment, but now the code should be more readable

function htmlspecialchars_decode(input, quote_style) {
	var c = {
		'&amp;': '&',
		'&lt;': '<',
		'&gt;': '>',
		'&quot;': '"',
		'&#039;': '\''
	};
	return ('' + input).replace(
		quote_style === 'ENT_QUOTES' ? /&amp;|&lt;|&gt;|&quot;|&#039;/g :
		quote_style === 'ENT_NOQUOTES' ? /&amp;|&lt;|&gt;/g :
		/&amp;|&lt;|&gt;|&quot;/,
		function (a) {
			return c[a];
		}
	);
}

Gravatar
Rafał Kukawski
Apr 27th Permalink

q  @Jerry: very short and clean solution. I just managed to replace the 3 .replace calls with just one, by choosing the regexp with a conditional expression.

function htmlspecialchars_decode(input, quote_style){
   var c = {
      '&amp;': '&',
      '&lt;': '<',
      '&gt;': '>',
      '&quot;': '"',
      '&#039;': '\''
   };
   return ('' + input).replace(quote_style === 'ENT_QUOTES' ? /&amp;|&lt;|&gt;|&quot;|&#039;/g : quote_style === 'ENT_NOQUOTES' ? /&amp;|&lt;|&gt;/g : /&amp;|&lt;|&gt;|&quot;/, function(a){return c[a]; });
}



The performance of both solutions should be comparable.
And I added casting of the input to string.

Gravatar
Jerry
Apr 27th Permalink

q  Here is my simple implementation of htmlspecialchars_decode.
I use just one replace and I have not come across a situation where an html entity is double-decoded. Comments are welcome

function(a,b){
var c={
     '&amp;':'&',
     '&lt;':'<',
     '&gt;':'>',
     '&quot;':'"',
     '&#039;':'\''
};
     if(b==='ENT_QUOTES'){
     return a.replace(/&amp;|&lt;|&gt;|&quot;|&#039;/g,function(a){return c[a];});
     }
     else if(b==='ENT_NOQUOTES'){
     return a.replace(/&amp;|&lt;|&gt;/g,function(a){return c[a];});
     }
     else{
     return a.replace(/&amp;|&lt;|&gt;|&quot;/g,function(a){return c[a];});
     }
}

Gravatar
Brett Zamir
Feb 13th Permalink

q  See my comment under htmlspecialchars()

Gravatar
hacksmw
Feb 11th Permalink

q  htmlspecialchars_decode function in PHP doesn't work recursive.
but this function is too recursive.
so "&amp;#9787;" will not be converted by this function as "&#9787;"
however, it will be converted as "☻"
on the other hand,
the function in php will convert it as "&#9787;"

Gravatar
Brett Zamir
25 Nov '09 Permalink

q  @Mailfaker: Thanks. I've completely redone the two htmlspecialchars functions in Git, also to handle flags and arguments: http://github.com/kvz/phpjs/commit/881de8748cf986d025ecfad5f448fbbb8ba7710e . Btw, using replace was much faster for me (and easier) than using split and join.

Gravatar
Mailfaker
25 Nov '09 Permalink

q  Hi everyone,
this code wasn't working for me. I have done some changes and now it runs.
The problem is that, for decoding, hash_map table must be read in descending order. Or simply, you can do so:

function htmlspecialchars_decode (string) {
    tmp_str = string.toString();
    tmp_str = tmp_str.split('&quot;').join('"');
    tmp_str = tmp_str.split('&lt;').join('<');
    tmp_str = tmp_str.split('&gt;').join('>');
    tmp_str = tmp_str.split('&amp;').join('&');
    return tmp_str;
}

Gravatar
Kevin van Zonneveld
16 Aug '09 Permalink

q  @ Liviu Mirea: I added your example as a testcase, but I was unable to reproduce the problem.

What version & browser are you using?

Gravatar
Liviu Mirea
10 Aug '09 Permalink

q  I'm sorry but the messaging system seems to be messed up and I can't post my message. What I'm trying to say is that the above function is incorrect. If you try to decode "& amp; quot;" (remove spaces) it will output a double quotation mark instead of "& quot;" (remove spaces). Hope this message will be properly posted. :/

Gravatar
Liviu Mirea
10 Aug '09 Permalink

q  Erm, ignore my message below, the caracters are messed up.
Here:

htmlspecialchars_decode(' " '); 


In PHP it returns:


"



The Javascript function above returns: "
Basically, it first decodes

"&"

to

"&"

, thus resulting

"""

. It further decodes the string to a double quotation mark when it shouldn't.

Gravatar
Liviu Mirea
10 Aug '09 Permalink

q  

htmlspecialchars_decode(' &amp;quot; '); 


In PHP it returns: "

The Javascript function above returns: "
Basically, it first decodes "&" to "&", thus resulting """. Afterward, it decodes """ but it shouldn't.

Gravatar
Kevin van Zonneveld
29 Sep '08 Permalink

q  @ ReverseSyntax &amp; Onno Marsman: Wow that was ugly. Sorry everyone. Fixed.

Gravatar
Onno Marsman
25 Sep '08 Permalink

q  There is a serious parse error in this function

[CODE=&quot;Javascript&quot;]
string = string.replace(/&amp;gt;/g '&gt;');
[/CODE]

should be (added a comma):

[CODE=&quot;Javascript&quot;]
string = string.replace(/&amp;gt;/g, '&gt;');
[/CODE]

Gravatar
ReverseSyntax
25 Sep '08 Permalink

q  There is an error in the htmlspecialchars_decode(),
There a single quote around the regex for all params values in replace() except for &gt; the only one that works. this is in the php.min.js

Gravatar
Kevin van Zonneveld
21 Sep '08 Permalink

q  [CODE=&quot;php&quot;]
&lt;?php
echo html_entity_decode(&quot;&amp;#56;&quot;).&quot;\n&quot;;
?&gt;
[/CODE]
returns 8.

This behavior is not documented in the PHP manual though, do you know what table is used here?

Gravatar
Trevor
17 Sep '08 Permalink

q  Issue: Doesn't decode all html escaped characters, such as &amp;#56;

Gravatar
Kevin van Zonneveld
27 Jul '08 Permalink

q  @ Bob Palin: Thank you for noticing. It is possible to declare global constants in javascript, but that would increase the number of dependencies throughout this project.

We have deliberately chosen to implement this a bit different from the original PHP documentation to allow for more functions to be included separately.

Gravatar
Bob Palin
27 Jul '08 Permalink

q  The function description says that 'quote_style' is an int and list constants, in fact the argument is a string as shown in the code and example.

Gravatar
Mateusz "loonquawl" Zalega (http://loonquawl.yoyo.
31 May '08 Permalink

q  No problem :)

There's another bug in this function. First argument of called function string.replace() is a string object '/&amp;amp;/g'. It won't work, unless it's a regular expression object (should be /&amp;amp;/g - without the apostrophes).
Here's the correct code:
[CODE=&quot;Javascript&quot;]
string = string.toString();

// Always encode
string = string.replace(/&amp;amp;/g, '&amp;');
string = string.replace(/&amp;lt;/g, '&lt;');
string = string.replace(/&amp;gt;/g, '&gt;');

// Encode depending on quote_style
if (quote_style == 'ENT_QUOTES') {
string = string.replace(/&amp;quot;/g, '&quot;');
string = string.replace(/&amp;#039;/g, '\'');
} else if (quote_style != 'ENT_NOQUOTES') {
// All other cases (ENT_COMPAT, default, but not ENT_NOQUOTES)
string = string.replace(/&amp;quot;/g, '&quot;');
}

return string;
[/CODE]

This is explained here:
http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Reference:Global_Objects:String:replace
http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Reference:Objects:RegExp

Btw. Most people involved in php2js project have their full names in credits. So, my name's Mateusz Zalega. Just saying :)

Gravatar
Kevin van Zonneveld
31 May '08 Permalink

q  @ loonquawl: Guess it should ;) thank you!

Gravatar
loonquawl
31 May '08 Permalink

q  Shouldn't it be
[CODE=&quot;Javascript&quot;]
string = string.replace(/&amp;/g, '&amp;');
string = string.replace(/&lt;/g, '&lt;');
string = string.replace(/&gt;/g, '&gt;');
[/CODE]

rather than

[CODE = &quot;Javascript&quot;]
string.replace('/&amp;/g', '&amp;');
string.replace('/&lt;/g', '&lt;');
string.replace(/&gt;/g, '&gt;')
[/CODE]
?

Function (string object).replace() doesn't modify the string. It returns a new (replaced) string object.


Contribute a New function