[javascript] HTML Entity Decode

How do I encode and decode HTML entities using JavaScript or JQuery?

var varTitle = "Chris' corner";

I want it to be:

var varTitle = "Chris' corner";

This question is related to javascript jquery html

The answer is


Inspired by Robert K's solution, this version does not strip HTML tags, and is just as secure.

var decode_entities = (function() {
    // Remove HTML Entities
    var element = document.createElement('div');

    function decode_HTML_entities (str) {

        if(str && typeof str === 'string') {

            // Escape HTML before decoding for HTML Entities
            str = escape(str).replace(/%26/g,'&').replace(/%23/g,'#').replace(/%3B/g,';');

            element.innerHTML = str;
            if(element.innerText){
                str = element.innerText;
                element.innerText = '';
            }else{
                // Firefox support
                str = element.textContent;
                element.textContent = '';
            }
        }
        return unescape(str);
    }
    return decode_HTML_entities;
})();

jQuery provides a way to encode and decode html entities.

If you use a "<div/>" tag, it will strip out all the html.

function htmlDecode(value) {
    return $("<div/>").html(value).text();
}

function htmlEncode(value) {
    return $('<div/>').text(value).html();
}

If you use a "<textarea/>" tag, it will preserve the html tags.

function htmlDecode(value) {
    return $("<textarea/>").html(value).text();
}

function htmlEncode(value) {
    return $('<textarea/>').text(value).html();
}

To add yet another "inspired by Robert K" to the list, here is another safe version which does not strip HTML tags. Instead of running the whole string through the HTML parser, it pulls out only the entities and converts those.

var decodeEntities = (function() {
    // this prevents any overhead from creating the object each time
    var element = document.createElement('div');

    // regular expression matching HTML entities
    var entity = /&(?:#x[a-f0-9]+|#[0-9]+|[a-z0-9]+);?/ig;

    return function decodeHTMLEntities(str) {
        // find and replace all the html entities
        str = str.replace(entity, function(m) {
            element.innerHTML = m;
            return element.textContent;
        });

        // reset the value
        element.textContent = '';

        return str;
    }
})();

Because @Robert K and @mattcasey both have good code, I thought I'd contribute here with a CoffeeScript version, in case anyone in the future could use it:

    String::unescape = (strict = false) ->
      ###
      # Take escaped text, and return the unescaped version
      #
      # @param string str | String to be used
      # @param bool strict | Stict mode will remove all HTML
      #
      # Test it here:
      # https://jsfiddle.net/tigerhawkvok/t9pn1dn5/
      #
      # Code: https://gist.github.com/tigerhawkvok/285b8631ed6ebef4446d
      ###
      # Create a dummy element
      element = document.createElement("div")
      decodeHTMLEntities = (str) ->
        if str? and typeof str is "string"
          unless strict is true
            # escape HTML tags
            str = escape(str).replace(/%26/g,'&').replace(/%23/g,'#').replace(/%3B/g,';')
          else
            str = str.replace(/<script[^>]*>([\S\s]*?)<\/script>/gmi, '')
            str = str.replace(/<\/?\w(?:[^"'>]|"[^"]*"|'[^']*')*>/gmi, '')
          element.innerHTML = str
          if element.innerText
            # Do we support innerText?
            str = element.innerText
            element.innerText = ""
          else
            # Firefox
            str = element.textContent
            element.textContent = ""
        unescape(str)
      # Remove encoded or double-encoded tags
      fixHtmlEncodings = (string) ->
        string = string.replace(/\&amp;#/mg, '&#') # The rest, for double-encodings
        string = string.replace(/\&quot;/mg, '"')
        string = string.replace(/\&quote;/mg, '"')
        string = string.replace(/\&#95;/mg, '_')
        string = string.replace(/\&#39;/mg, "'")
        string = string.replace(/\&#34;/mg, '"')
        string = string.replace(/\&#62;/mg, '>')
        string = string.replace(/\&#60;/mg, '<')
        string
      # Run it
      tmp = fixHtmlEncodings(this)
      decodeHTMLEntities(tmp)

See https://jsfiddle.net/tigerhawkvok/t9pn1dn5/7/ or https://gist.github.com/tigerhawkvok/285b8631ed6ebef4446d (includes compiled JS, and is probably updated compared to this answer)


Here is a full version

function htmldecode(s){
    window.HTML_ESC_MAP = {
    "nbsp":" ","iexcl":"¡","cent":"¢","pound":"£","curren":"¤","yen":"¥","brvbar":"¦","sect":"§","uml":"¨","copy":"©","ordf":"ª","laquo":"«","not":"¬","reg":"®","macr":"¯","deg":"°","plusmn":"±","sup2":"²","sup3":"³","acute":"´","micro":"µ","para":"¶","middot":"·","cedil":"¸","sup1":"¹","ordm":"º","raquo":"»","frac14":"¼","frac12":"½","frac34":"¾","iquest":"¿","Agrave":"À","Aacute":"Á","Acirc":"Â","Atilde":"Ã","Auml":"Ä","Aring":"Å","AElig":"Æ","Ccedil":"Ç","Egrave":"È","Eacute":"É","Ecirc":"Ê","Euml":"Ë","Igrave":"Ì","Iacute":"Í","Icirc":"Î","Iuml":"Ï","ETH":"Ð","Ntilde":"Ñ","Ograve":"Ò","Oacute":"Ó","Ocirc":"Ô","Otilde":"Õ","Ouml":"Ö","times":"×","Oslash":"Ø","Ugrave":"Ù","Uacute":"Ú","Ucirc":"Û","Uuml":"Ü","Yacute":"Ý","THORN":"Þ","szlig":"ß","agrave":"à","aacute":"á","acirc":"â","atilde":"ã","auml":"ä","aring":"å","aelig":"æ","ccedil":"ç","egrave":"è","eacute":"é","ecirc":"ê","euml":"ë","igrave":"ì","iacute":"í","icirc":"î","iuml":"ï","eth":"ð","ntilde":"ñ","ograve":"ò","oacute":"ó","ocirc":"ô","otilde":"õ","ouml":"ö","divide":"÷","oslash":"ø","ugrave":"ù","uacute":"ú","ucirc":"û","uuml":"ü","yacute":"ý","thorn":"þ","yuml":"ÿ","fnof":"ƒ","Alpha":"?","Beta":"?","Gamma":"G","Delta":"?","Epsilon":"?","Zeta":"?","Eta":"?","Theta":"T","Iota":"?","Kappa":"?","Lambda":"?","Mu":"?","Nu":"?","Xi":"?","Omicron":"?","Pi":"?","Rho":"?","Sigma":"S","Tau":"?","Upsilon":"?","Phi":"F","Chi":"?","Psi":"?","Omega":"O","alpha":"a","beta":"ß","gamma":"?","delta":"d","epsilon":"e","zeta":"?","eta":"?","theta":"?","iota":"?","kappa":"?","lambda":"?","mu":"µ","nu":"?","xi":"?","omicron":"?","pi":"p","rho":"?","sigmaf":"?","sigma":"s","tau":"t","upsilon":"?","phi":"f","chi":"?","psi":"?","omega":"?","thetasym":"?","upsih":"?","piv":"?","bull":"•","hellip":"…","prime":"'","Prime":""","oline":"?","frasl":"/","weierp":"P","image":"I","real":"R","trade":"™","alefsym":"?","larr":"?","uarr":"?","rarr":"?","darr":"?","harr":"?","crarr":"?","lArr":"?","uArr":"?","rArr":"?","dArr":"?","hArr":"?","forall":"?","part":"?","exist":"?","empty":"Ø","nabla":"?","isin":"?","notin":"?","ni":"?","prod":"?","sum":"?","minus":"-","lowast":"*","radic":"v","prop":"?","infin":"8","ang":"?","and":"?","or":"?","cap":"n","cup":"?","int":"?","there4":"?","sim":"~","cong":"?","asymp":"˜","ne":"?","equiv":"=","le":"=","ge":"=","sub":"?","sup":"?","nsub":"?","sube":"?","supe":"?","oplus":"?","otimes":"?","perp":"?","sdot":"·","lceil":"?","rceil":"?","lfloor":"?","rfloor":"?","lang":"<","rang":">","loz":"?","spades":"?","clubs":"?","hearts":"?","diams":"?","\"":"quot","amp":"&","lt":"<","gt":">","OElig":"Œ","oelig":"œ","Scaron":"Š","scaron":"š","Yuml":"Ÿ","circ":"ˆ","tilde":"˜","ndash":"–","mdash":"—","lsquo":"‘","rsquo":"’","sbquo":"‚","ldquo":"“","rdquo":"”","bdquo":"„","dagger":"†","Dagger":"‡","permil":"‰","lsaquo":"‹","rsaquo":"›","euro":"€"};
    if(!window.HTML_ESC_MAP_EXP)
        window.HTML_ESC_MAP_EXP = new RegExp("&("+Object.keys(HTML_ESC_MAP).join("|")+");","g");
    return s?s.replace(window.HTML_ESC_MAP_EXP,function(x){
        return HTML_ESC_MAP[x.substring(1,x.length-1)]||x;
    }):s;
}

Usage

htmldecode("&sum;&nbsp;&gt;&euro;");

Original author answer here.

This is my favourite way of decoding HTML characters. The advantage of using this code is that tags are also preserved.

function decodeHtml(html) {
    var txt = document.createElement("textarea");
    txt.innerHTML = html;
    return txt.value;
}

Example: http://jsfiddle.net/k65s3/

Input:

Entity:&nbsp;Bad attempt at XSS:<script>alert('new\nline?')</script><br>

Output:

Entity: Bad attempt at XSS:<script>alert('new\nline?')</script><br>

I think that is the exact opposite of the solution chosen.

var decoded = $("<div/>").text(encodedStr).html();

Try it :)


Inspired by Robert K's solution, strips html tags and prevents executing scripts and eventhandlers like: <img src=fake onerror="prompt(1)"> Tested on latest Chrome, FF, IE (should work from IE9, but haven't tested).

var decodeEntities = (function () {
        //create a new html document (doesn't execute script tags in child elements)
        var doc = document.implementation.createHTMLDocument("");
        var element = doc.createElement('div');

        function getText(str) {
            element.innerHTML = str;
            str = element.textContent;
            element.textContent = '';
            return str;
        }

        function decodeHTMLEntities(str) {
            if (str && typeof str === 'string') {
                var x = getText(str);
                while (str !== x) {
                    str = x;
                    x = getText(x);
                }
                return x;
            }
        }
        return decodeHTMLEntities;
    })();

Simply call:

decodeEntities('<img src=fake onerror="prompt(1)">');
decodeEntities("<script>alert('aaa!')</script>");

A more functional approach to @William Lahti's answer:

var entities = {
  'amp': '&',
  'apos': '\'',
  '#x27': '\'',
  '#x2F': '/',
  '#39': '\'',
  '#47': '/',
  'lt': '<',
  'gt': '>',
  'nbsp': ' ',
  'quot': '"'
}

function decodeHTMLEntities (text) {
  return text.replace(/&([^;]+);/gm, function (match, entity) {
    return entities[entity] || match
  })
}

I know I'm a bit late to the game, but I thought I might provide the following snippet as an example of how I decode HTML entities using jQuery:

var varTitleE = "Chris&apos; corner";
var varTitleD = $("<div/>").html(varTitleE).text();

console.log(varTitleE + " vs. " + varTitleD);???????????

Don't forget to fire-up your inspector/firebug to see the console results -- or simply replace console.log(...) w/alert(...)

That said, here's what my console via the Google Chrome inspector read:

Chris&apos; corner vs. Chris' corner

To do it in pure javascript without jquery or predefining everything you can cycle the encoded html string through an elements innerHTML and innerText(/textContent) properties for every decode step that is required:

<html>
  <head>
    <title>For every decode step, cycle through innerHTML and innerText </title>
    <script>
function decode(str) {
  var d = document.createElement("div");
  d.innerHTML = str; 
  return typeof d.innerText !== 'undefined' ? d.innerText : d.textContent;
}
    </script>
  </head>
  <body>
    <script>
var encodedString = "&lt;p&gt;name&lt;/p&gt;&lt;p&gt;&lt;span style=\"font-size:xx-small;\"&gt;ajde&lt;/span&gt;&lt;/p&gt;&lt;p&gt;&lt;em&gt;da&lt;/em&gt;&lt;/p&gt;";
    </script>
    <input type=button onclick="document.body.innerHTML=decode(encodedString)"/>
  </body>
</html>

I recommend against using the jQuery code that was accepted as the answer. While it does not insert the string to decode into the page, it does cause things such as scripts and HTML elements to get created. This is way more code than we need. Instead, I suggest using a safer, more optimized function.

var decodeEntities = (function() {
  // this prevents any overhead from creating the object each time
  var element = document.createElement('div');

  function decodeHTMLEntities (str) {
    if(str && typeof str === 'string') {
      // strip script/html tags
      str = str.replace(/<script[^>]*>([\S\s]*?)<\/script>/gmi, '');
      str = str.replace(/<\/?\w(?:[^"'>]|"[^"]*"|'[^']*')*>/gmi, '');
      element.innerHTML = str;
      str = element.textContent;
      element.textContent = '';
    }

    return str;
  }

  return decodeHTMLEntities;
})();

http://jsfiddle.net/LYteC/4/

To use this function, just call decodeEntities("&amp;") and it will use the same underlying techniques as the jQuery version will—but without jQuery's overhead, and after sanitizing the HTML tags in the input. See Mike Samuel's comment on the accepted answer for how to filter out HTML tags.

This function can be easily used as a jQuery plugin by adding the following line in your project.

jQuery.decodeEntities = decodeEntities;

Here's a quick method that doesn't require creating a div, and decodes the "most common" HTML escaped chars:

function decodeHTMLEntities(text) {
    var entities = [
        ['amp', '&'],
        ['apos', '\''],
        ['#x27', '\''],
        ['#x2F', '/'],
        ['#39', '\''],
        ['#47', '/'],
        ['lt', '<'],
        ['gt', '>'],
        ['nbsp', ' '],
        ['quot', '"']
    ];

    for (var i = 0, max = entities.length; i < max; ++i) 
        text = text.replace(new RegExp('&'+entities[i][0]+';', 'g'), entities[i][1]);

    return text;
}

Like Robert K said, don't use jQuery.html().text() to decode html entities as it's unsafe because user input should never have access to the DOM. Read about XSS for why this is unsafe.

Instead try the Underscore.js utility-belt library which comes with escape and unescape methods:

_.escape(string)

Escapes a string for insertion into HTML, replacing &, <, >, ", `, and ' characters.

_.escape('Curly, Larry & Moe');
=> "Curly, Larry &amp; Moe"

_.unescape(string)

The opposite of escape, replaces &amp;, &lt;, &gt;, &quot;, &#96; and &#x27; with their unescaped counterparts.

_.unescape('Curly, Larry &amp; Moe');
=> "Curly, Larry & Moe"

To support decoding more characters, just copy the Underscore unescape method and add more characters to the map.


here is another version:

_x000D_
_x000D_
function convertHTMLEntity(text){_x000D_
    const span = document.createElement('span');_x000D_
_x000D_
    return text_x000D_
    .replace(/&[#A-Za-z0-9]+;/gi, (entity,position,text)=> {_x000D_
        span.innerHTML = entity;_x000D_
        return span.innerText;_x000D_
    });_x000D_
}_x000D_
_x000D_
console.log(convertHTMLEntity('Large &lt; &#163; 500'));
_x000D_
_x000D_
_x000D_


Injecting untrusted HTML into the page is dangerous as explained in How to decode HTML entities using jQuery?.

One alternative is to use a JavaScript-only implementation of PHP's html_entity_decode (from http://phpjs.org/functions/html_entity_decode:424). The example would then be something like:

var varTitle = html_entity_decode("Chris&apos; corner");

Examples related to javascript

need to add a class to an element How to make a variable accessible outside a function? Hide Signs that Meteor.js was Used How to create a showdown.js markdown extension Please help me convert this script to a simple image slider Highlight Anchor Links when user manually scrolls? Summing radio input values How to execute an action before close metro app WinJS javascript, for loop defines a dynamic variable name Getting all files in directory with ajax

Examples related to jquery

How to make a variable accessible outside a function? Jquery assiging class to th in a table Please help me convert this script to a simple image slider Highlight Anchor Links when user manually scrolls? Getting all files in directory with ajax Bootstrap 4 multiselect dropdown Cross-Origin Read Blocking (CORB) bootstrap 4 file input doesn't show the file name Jquery AJAX: No 'Access-Control-Allow-Origin' header is present on the requested resource how to remove json object key and value.?

Examples related to html

Embed ruby within URL : Middleman Blog Please help me convert this script to a simple image slider Generating a list of pages (not posts) without the index file Why there is this "clear" class before footer? Is it possible to change the content HTML5 alert messages? Getting all files in directory with ajax DevTools failed to load SourceMap: Could not load content for chrome-extension How to set width of mat-table column in angular? How to open a link in new tab using angular? ERROR Error: Uncaught (in promise), Cannot match any routes. URL Segment