utf8_helper.cpp
changeset 0 0aa9866d751c
child 10 15fd256d4adb
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/utf8_helper.cpp	Thu Apr 16 20:43:31 2015 +0200
     1.3 @@ -0,0 +1,68 @@
     1.4 +#include "stdafx.h"
     1.5 +#include "utf8_helper.h"
     1.6 +
     1.7 +using namespace std;
     1.8 +
     1.9 +string utf8_string(wstring wstr, NORM_FORM norm) {
    1.10 +	if (wstr.size() == 0)
    1.11 +		return string();
    1.12 +
    1.13 +	wstring _wstr_normalized;
    1.14 +
    1.15 +	if (norm == NormalizationOther)
    1.16 +		_wstr_normalized = wstr;
    1.17 +	else {
    1.18 +		int size = NormalizeString(norm, wstr.c_str(), -1, NULL, 0);
    1.19 +		assert(size > 0);
    1.20 +		if (size > 0) {
    1.21 +			wchar_t *buf = new wchar_t[size];
    1.22 +			NormalizeString(norm, wstr.c_str(), -1, buf, size);
    1.23 +			_wstr_normalized = buf;
    1.24 +			delete[] buf;
    1.25 +		}
    1.26 +		else
    1.27 +			throw out_of_range("input wstring is not valid while normalizing.");
    1.28 +	}
    1.29 +	string result;
    1.30 +
    1.31 +	int size = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, _wstr_normalized.c_str(), -1, NULL, 0, NULL, NULL);
    1.32 +	assert(size);
    1.33 +	if (size) {
    1.34 +		char *buf = new char[size];
    1.35 +		WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, _wstr_normalized.c_str(), -1, buf, size, NULL, NULL);
    1.36 +		result = buf;
    1.37 +		delete[] buf;
    1.38 +	}
    1.39 +	else
    1.40 +		throw out_of_range("input wstring is not valid while converting UTF-16 to UTF-8.");
    1.41 +
    1.42 +	return result;
    1.43 +}
    1.44 +
    1.45 +string utf8_string(BSTR bstr, NORM_FORM norm) {
    1.46 +	return utf8_string((wstring)(wchar_t *)_bstr_t(bstr, true), norm);
    1.47 +}
    1.48 +
    1.49 +wstring utf16_string(string str) {
    1.50 +	if (str.size() == 0)
    1.51 +		return wstring();
    1.52 +
    1.53 +	wstring result;
    1.54 +
    1.55 +	int size = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str.c_str(), -1, NULL, 0);
    1.56 +	assert(size);
    1.57 +	if (size) {
    1.58 +		wchar_t * buf = new wchar_t[size];
    1.59 +		MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, str.c_str(), -1, buf, size);
    1.60 +		result = buf;
    1.61 +		delete[] buf;
    1.62 +	}
    1.63 +	else
    1.64 +		throw out_of_range("input string is not valid while converting UTF-8 to UTF-16.");
    1.65 +
    1.66 +	return result;
    1.67 +}
    1.68 +
    1.69 +_bstr_t utf16_bstr(string str) {
    1.70 +	return _bstr_t(utf16_string(str).c_str());
    1.71 +}