I am processing xhtml using javascript. I am getting the text content for a div node by concatenating the nodeValue of all child nodes where nodeType == Node.TEXT_NODE.
The resulting string sometimes contains a non-breaking space entity. How do I replace this with a regular space character?
My div looks like this...
<div><b>Expires On</b> Sep 30, 2009 06:30 AM</div>
The following suggestions found on the web did not work:
var cleanText = text.replace(/^\xa0*([^\xa0]*)\xa0*$/g,"");
var cleanText = replaceHtmlEntities(text);
var replaceHtmlEntites = (function() {
var translate_re = /&(nbsp|amp|quot|lt|gt);/g;
var translate = {
"nbsp": " ",
"amp" : "&",
"quot": "\"",
"lt" : "<",
"gt" : ">"
};
return function(s) {
return ( s.replace(translate_re, function(match, entity) {
return translate[entity];
}) );
}
})();
Any suggestions?
This question is related to
javascript
regex
html-entities
That first line is pretty messed up. It only needs to be:
var cleanText = text.replace(/\xA0/g,' ');
That should be all you need.
If you only need to replace
then you can use a far simpler regex:
var textWithNBSpaceReplaced = originalText.replace(/ /g, ' ');
Also, there is a typo in your div example, it says &nnbsp;
instead of
.
I think when you define a function with "var foo = function() {...};
", the function is only defined after that line. In other words, try this:
var replaceHtmlEntites = (function() {
var translate_re = /&(nbsp|amp|quot|lt|gt);/g;
var translate = {
"nbsp": " ",
"amp" : "&",
"quot": "\"",
"lt" : "<",
"gt" : ">"
};
return function(s) {
return ( s.replace(translate_re, function(match, entity) {
return translate[entity];
}) );
}
})();
var cleanText = text.replace(/^\xa0*([^\xa0]*)\xa0*$/g,"");
cleanText = replaceHtmlEntities(text);
Edit: Also, only use "var
" the first time you declare a variable (you're using it twice on the cleanText
variable).
Edit 2: The problem is the spelling of the function name. You have "var replaceHtmlEntites =". It should be "var replaceHtmlEntities ="
i used this, and it worked:
var cleanText = text.replace(/&nbsp;/g,"");
var text = "" &<>";
text = text.replaceHtmlEntites();
String.prototype.replaceHtmlEntites = function() {
var s = this;
var translate_re = /&(nbsp|amp|quot|lt|gt);/g;
var translate = {"nbsp": " ","amp" : "&","quot": "\"","lt" : "<","gt" : ">"};
return ( s.replace(translate_re, function(match, entity) {
return translate[entity];
}) );
};
try this.....this worked for me
Removes everything between &
and ;
which all such symbols have. if you juts want to get rid of them.
text.replace(/&.*;/g,'');
for me replace doesn't work... try this code:
str = str.split(""").join('"');
Source: Stackoverflow.com