If you have UTF8, use this (actually works with SVG source), like:
btoa(unescape(encodeURIComponent(str)))
example:
var imgsrc = 'data:image/svg+xml;base64,' + btoa(unescape(encodeURIComponent(markup)));
var img = new Image(1, 1); // width, height values are optional params
img.src = imgsrc;
If you need to decode that base64, use this:
var str2 = decodeURIComponent(escape(window.atob(b64)));
console.log(str2);
Example:
var str = "äöüÄÖÜçéèñ";
var b64 = window.btoa(unescape(encodeURIComponent(str)))
console.log(b64);
var str2 = decodeURIComponent(escape(window.atob(b64)));
console.log(str2);
Note: if you need to get this to work in mobile-safari, you might need to strip all the white-space from the base64 data...
function b64_to_utf8( str ) {
str = str.replace(/\s/g, '');
return decodeURIComponent(escape(window.atob( str )));
}
2017 Update
This problem has been bugging me again.
The simple truth is, atob doesn't really handle UTF8-strings - it's ASCII only.
Also, I wouldn't use bloatware like js-base64.
But webtoolkit does have a small, nice and very maintainable implementation:
/**
*
* Base64 encode / decode
* http://www.webtoolkit.info
*
**/
var Base64 = {
// private property
_keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
// public method for encoding
, encode: function (input)
{
var output = "";
var chr1, chr2, chr3, enc1, enc2, enc3, enc4;
var i = 0;
input = Base64._utf8_encode(input);
while (i < input.length)
{
chr1 = input.charCodeAt(i++);
chr2 = input.charCodeAt(i++);
chr3 = input.charCodeAt(i++);
enc1 = chr1 >> 2;
enc2 = ((chr1 & 3) << 4) | (chr2 >> 4);
enc3 = ((chr2 & 15) << 2) | (chr3 >> 6);
enc4 = chr3 & 63;
if (isNaN(chr2))
{
enc3 = enc4 = 64;
}
else if (isNaN(chr3))
{
enc4 = 64;
}
output = output +
this._keyStr.charAt(enc1) + this._keyStr.charAt(enc2) +
this._keyStr.charAt(enc3) + this._keyStr.charAt(enc4);
} // Whend
return output;
} // End Function encode
// public method for decoding
,decode: function (input)
{
var output = "";
var chr1, chr2, chr3;
var enc1, enc2, enc3, enc4;
var i = 0;
input = input.replace(/[^A-Za-z0-9\+\/\=]/g, "");
while (i < input.length)
{
enc1 = this._keyStr.indexOf(input.charAt(i++));
enc2 = this._keyStr.indexOf(input.charAt(i++));
enc3 = this._keyStr.indexOf(input.charAt(i++));
enc4 = this._keyStr.indexOf(input.charAt(i++));
chr1 = (enc1 << 2) | (enc2 >> 4);
chr2 = ((enc2 & 15) << 4) | (enc3 >> 2);
chr3 = ((enc3 & 3) << 6) | enc4;
output = output + String.fromCharCode(chr1);
if (enc3 != 64)
{
output = output + String.fromCharCode(chr2);
}
if (enc4 != 64)
{
output = output + String.fromCharCode(chr3);
}
} // Whend
output = Base64._utf8_decode(output);
return output;
} // End Function decode
// private method for UTF-8 encoding
,_utf8_encode: function (string)
{
var utftext = "";
string = string.replace(/\r\n/g, "\n");
for (var n = 0; n < string.length; n++)
{
var c = string.charCodeAt(n);
if (c < 128)
{
utftext += String.fromCharCode(c);
}
else if ((c > 127) && (c < 2048))
{
utftext += String.fromCharCode((c >> 6) | 192);
utftext += String.fromCharCode((c & 63) | 128);
}
else
{
utftext += String.fromCharCode((c >> 12) | 224);
utftext += String.fromCharCode(((c >> 6) & 63) | 128);
utftext += String.fromCharCode((c & 63) | 128);
}
} // Next n
return utftext;
} // End Function _utf8_encode
// private method for UTF-8 decoding
,_utf8_decode: function (utftext)
{
var string = "";
var i = 0;
var c, c1, c2, c3;
c = c1 = c2 = 0;
while (i < utftext.length)
{
c = utftext.charCodeAt(i);
if (c < 128)
{
string += String.fromCharCode(c);
i++;
}
else if ((c > 191) && (c < 224))
{
c2 = utftext.charCodeAt(i + 1);
string += String.fromCharCode(((c & 31) << 6) | (c2 & 63));
i += 2;
}
else
{
c2 = utftext.charCodeAt(i + 1);
c3 = utftext.charCodeAt(i + 2);
string += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
i += 3;
}
} // Whend
return string;
} // End Function _utf8_decode
}
https://www.fileformat.info/info/unicode/utf8.htm
For any character equal to or below 127 (hex 0x7F), the UTF-8 representation is one byte. It is just the lowest 7 bits of the full unicode value. This is also the same as the ASCII value.
For characters equal to or below 2047 (hex 0x07FF), the UTF-8 representation is spread across two bytes. The first byte will have the two high bits set and the third bit clear (i.e. 0xC2 to 0xDF). The second byte will have the top bit set and the second bit clear (i.e. 0x80 to 0xBF).
For all characters equal to or greater than 2048 but less that 65535 (0xFFFF), the UTF-8 representation is spread across three bytes.