I am trying to print a Russian "?" (U+0444 CYRILLIC SMALL LETTER EF) character, which is given a code of decimal 1092. Using C++, how can I print out this character? I would have thought something along the lines of the following would work, yet...
int main (){
wchar_t f = '1060';
cout << f << endl;
}
When compiling with -std=c++11
, one can simply
const char *s = u8"\u0444";
cout << s << endl;
I needed to show the string in UI as well as save that to an xml configuration file. The above specified format is good for string in c++, I would add we can have the xml compatible string for the special character by replacing "\u" by "&#x" and adding a ";" at the end.
For example :
C++ : "\u0444" --> XML : "ф"
If you use Windows (note, we are using printf(), not cout):
//Save As UTF8 without signature
#include <stdio.h>
#include<windows.h>
int main (){
SetConsoleOutputCP(65001);
printf("?\n");
}
Not Unicode but working - 1251 instead of UTF8:
//Save As Windows 1251
#include <iostream>
#include<windows.h>
using namespace std;
int main (){
SetConsoleOutputCP(1251);
cout << "?" << endl;
}
Special thanks to the answer here for more-or-less the same question.
For me, all I needed was setlocale(LC_ALL, "en_US.UTF-8");
Then, I could use even raw wchar_t
characters.
In Linux, I can just do:
std::cout << "?";
I just copy-pasted characters from here and it didn't fail for at least the random sample that I tried on.
Ultimately, this is completely platform-dependent. Unicode-support is, unfortunately, very poor in Standard C++. For GCC, you will have to make it a narrow string, as they use UTF-8, and Windows wants a wide string, and you must output to wcout
.
// GCC
std::cout << "?";
// Windoze
wcout << L"?";
This code works in Linux (C++11, geany, g++ 7.4.0):
#include <iostream>
using namespace std;
int utf8_to_unicode(string utf8_code);
string unicode_to_utf8(int unicode);
int main()
{
cout << unicode_to_utf8(36) << '\t';
cout << unicode_to_utf8(162) << '\t';
cout << unicode_to_utf8(8364) << '\t';
cout << unicode_to_utf8(128578) << endl;
cout << unicode_to_utf8(0x24) << '\t';
cout << unicode_to_utf8(0xa2) << '\t';
cout << unicode_to_utf8(0x20ac) << '\t';
cout << unicode_to_utf8(0x1f642) << endl;
cout << utf8_to_unicode("$") << '\t';
cout << utf8_to_unicode("¢") << '\t';
cout << utf8_to_unicode("€") << '\t';
cout << utf8_to_unicode("") << endl;
cout << utf8_to_unicode("\x24") << '\t';
cout << utf8_to_unicode("\xc2\xa2") << '\t';
cout << utf8_to_unicode("\xe2\x82\xac") << '\t';
cout << utf8_to_unicode("\xf0\x9f\x99\x82") << endl;
return 0;
}
int utf8_to_unicode(string utf8_code)
{
unsigned utf8_size = utf8_code.length();
int unicode = 0;
for (unsigned p=0; p<utf8_size; ++p)
{
int bit_count = (p? 6: 8 - utf8_size - (utf8_size == 1? 0: 1)),
shift = (p < utf8_size - 1? (6*(utf8_size - p - 1)): 0);
for (int k=0; k<bit_count; ++k)
unicode += ((utf8_code[p] & (1 << k)) << shift);
}
return unicode;
}
string unicode_to_utf8(int unicode)
{
string s;
if (unicode>=0 and unicode <= 0x7f) // 7F(16) = 127(10)
{
s = static_cast<char>(unicode);
return s;
}
else if (unicode <= 0x7ff) // 7FF(16) = 2047(10)
{
unsigned char c1 = 192, c2 = 128;
for (int k=0; k<11; ++k)
{
if (k < 6) c2 |= (unicode % 64) & (1 << k);
else c1 |= (unicode >> 6) & (1 << (k - 6));
}
s = c1; s += c2;
return s;
}
else if (unicode <= 0xffff) // FFFF(16) = 65535(10)
{
unsigned char c1 = 224, c2 = 128, c3 = 128;
for (int k=0; k<16; ++k)
{
if (k < 6) c3 |= (unicode % 64) & (1 << k);
else if (k < 12) c2 |= (unicode >> 6) & (1 << (k - 6));
else c1 |= (unicode >> 12) & (1 << (k - 12));
}
s = c1; s += c2; s += c3;
return s;
}
else if (unicode <= 0x1fffff) // 1FFFFF(16) = 2097151(10)
{
unsigned char c1 = 240, c2 = 128, c3 = 128, c4 = 128;
for (int k=0; k<21; ++k)
{
if (k < 6) c4 |= (unicode % 64) & (1 << k);
else if (k < 12) c3 |= (unicode >> 6) & (1 << (k - 6));
else if (k < 18) c2 |= (unicode >> 12) & (1 << (k - 12));
else c1 |= (unicode >> 18) & (1 << (k - 18));
}
s = c1; s += c2; s += c3; s += c4;
return s;
}
else if (unicode <= 0x3ffffff) // 3FFFFFF(16) = 67108863(10)
{
; // actually, there are no 5-bytes unicodes
}
else if (unicode <= 0x7fffffff) // 7FFFFFFF(16) = 2147483647(10)
{
; // actually, there are no 6-bytes unicodes
}
else ; // incorrect unicode (< 0 or > 2147483647)
return "";
}
More:
'1060'
is four characters, and won't compile under the standard. You should just treat the character as a number, if your wide characters match 1:1 with Unicode (check your locale settings).
int main (){
wchar_t f = 1060;
wcout << f << endl;
}
Another solution in Linux:
string a = "?";
cout << "? = \xd0\xa4 = " << hex
<< int(static_cast<unsigned char>(a[0]))
<< int(static_cast<unsigned char>(a[1])) << " (" << a.length() << "B)" << endl;
string b = "v";
cout << "v = \xe2\x88\x9a = " << hex
<< int(static_cast<unsigned char>(b[0]))
<< int(static_cast<unsigned char>(b[1]))
<< int(static_cast<unsigned char>(b[2])) << " (" << b.length() << "B)" << endl;
Source: Stackoverflow.com