UTF-8とShift-JISの文字コードを変換するにあたって、簡単な計算では求まらないことは周知のことと思います。
そこで、簡単に変換できるように変換表を作ってみることにします。
ちなみにVisual Studio 2015で作りました。
// stdafx.h : 標準のシステム インクルード ファイルのインクルード ファイル、または // 参照回数が多く、かつあまり変更されない、プロジェクト専用のインクルード ファイル // を記述します。 // #pragma once #include "targetver.h" #include <stdio.h> #include <tchar.h> // TODO: プログラムに必要な追加ヘッダーをここで参照してください #include <windows.h>
// utf8tocp932.cpp : コンソール アプリケーションのエントリ ポイントを定義します。 // #include "stdafx.h" typedef struct { char buf[7]; } utf8char; void utf8tocp932(unsigned const char *utf) { int lenghtUnicode = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)utf, strlen((const char *)utf) + 1, NULL, 0); if (lenghtUnicode) { wchar_t* bufUnicode = new wchar_t[lenghtUnicode]; MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)utf, strlen((const char *)utf) + 1, bufUnicode, lenghtUnicode); int lengthSJis = WideCharToMultiByte(CP_THREAD_ACP, 0, bufUnicode, -1, NULL, 0, NULL, NULL); if (lengthSJis) { char* bufShiftJis = new char[lengthSJis]; BOOL usedDefaultChar; WideCharToMultiByte(CP_THREAD_ACP, WC_NO_BEST_FIT_CHARS, bufUnicode, lenghtUnicode + 1, bufShiftJis, lengthSJis, "??", &usedDefaultChar); if (!usedDefaultChar) { printf("{{"); for (int i = 0; strlen((const char *)utf) > i; i++) { printf("0x%02x,", utf[i]); } printf("}, {"); for (int i = 0; strlen(bufShiftJis) > i; i++) { printf("0x%02x,", (unsigned char)bufShiftJis[i]); } printf("}}\t// %s\n", bufShiftJis); } delete bufShiftJis; } delete bufUnicode; } } int main() { for (unsigned char chr1 = 0x20; 0x7f > chr1; chr1++) { unsigned char str[] = { chr1 , 0x00 }; utf8tocp932(str); } for (unsigned char chr1 = 0xC2; 0xef >= chr1; chr1++) { for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) { unsigned char str[] = { chr1 , chr2 , 0x00 }; utf8tocp932(str); } } for (unsigned char chr2 = 0xa0; 0xbf >= chr2; chr2++) { for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) { unsigned char str[] = { 0xe0 , chr2 , chr3 , 0x00 }; utf8tocp932(str); } } for (unsigned char chr1 = 0xe1; 0xef >= chr1; chr1++) { for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) { for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) { unsigned char str[] = { chr1 , chr2, chr3 , 0x00 }; utf8tocp932(str); } } } for (unsigned char chr2 = 0x90; 0xbf >= chr2; chr2++) { for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) { for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) { unsigned char str[] = { 0xf0 , chr2, chr3, chr4 , 0x00 }; utf8tocp932(str); } } } for (unsigned char chr1 = 0xf1; 0xf3 >= chr1; chr1++) { for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) { for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) { for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) { unsigned char str[] = { chr1 , chr2, chr3, chr4 , 0x00 }; utf8tocp932(str); } } } } for (unsigned char chr2 = 0x80; 0x8f >= chr2; chr2++) { for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) { for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) { unsigned char str[] = { 0xf4 , chr2, chr3, chr4 , 0x00 }; utf8tocp932(str); } } } for (unsigned char chr1 = 0xf5; 0xf7 >= chr1; chr1++) { for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) { for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) { for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) { unsigned char str[] = { chr1 , chr2, chr3, chr4 , 0x00 }; utf8tocp932(str); } } } } for (unsigned char chr1 = 0xf8; 0xfb >= chr1; chr1++) { for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) { for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) { for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) { for (unsigned char chr5 = 0x80; 0xbf >= chr5; chr5++) { unsigned char str[] = { chr1 , chr2, chr3, chr4, chr5 , 0x00 }; utf8tocp932(str); } } } } } for (unsigned char chr1 = 0xfc; 0xfd >= chr1; chr1++) { for (unsigned char chr2 = 0x80; 0xbf >= chr2; chr2++) { for (unsigned char chr3 = 0x80; 0xbf >= chr3; chr3++) { for (unsigned char chr4 = 0x80; 0xbf >= chr4; chr4++) { for (unsigned char chr5 = 0x80; 0xbf >= chr5; chr5++) { for (unsigned char chr6 = 0x80; 0xbf >= chr6; chr6++) { unsigned char str[] = { chr1 , chr2, chr3, chr4, chr5, chr6 , 0x00 }; utf8tocp932(str); } } } } } } return 0; }
参照
UTF8なstring入れたらShiftJISなstring出てくる関数作った
UTF-8