UNICODE/UTF-8與Shift-JIS文字代碼互相改變

發表時間：2004-08-05 19:41:57

IP:61.221.xxx.xxx 未訂閱

引言: http://homepage3.nifty.com/m-and-i/tips/jconex.htm UNICODE/UTF-8與Shift-JIS文字代碼互相改變(2003/11/30) Unicode(Little Endian/Big Endian)，UTF-8/8N的文字代碼檢查與Shift-JIS互相改變行為為了的單位。池田氏作品jconvert [Copyright(C) 1998 EarthWave Soft(IKEDA Takahiro)]的使用前提戶下擴張單位作為使用。注解１） UTF8Windows98以後被追加的API使用正在做為了Windows95動作彎曲與想和間沙灘。注解２） UNICODE１字節文字也２字節擴張有、字符串公允(在~~中)#0含為了String模具接待不能。為此、UNICODE從SJIS改變時候PWideChar模具，SJIS從UNICODE改變時候在……中ver渡的TMemoryStream使用者。改變了應該……文件保存事情思索、MemoryStream寫上領了日本影片樂音就是與想和間沙灘。 sjisToUniLE(ms, s); ms.SaveToFile(FileName); 完畢做完．．．使用時候、Uses季節jconvert與jconvex追加請。jconvert的InCodeCheck的代替InCodeCheckEx使用於是、文字代碼Unicode嗎UTF-8那個核對也能象結。合理認真核對正在做的是UTF-8隻有、Unicode與UTF-8N前頭的BOM核對正在做隻有的因為錯誤啟動的可能性有(^^; 從這裡…情況之下jconvex.pas作為保存、程序庫路徑的來往了文件夾保存請。 unit jconvex; interface uses Windows, SysUtils, jconvert, Classes; const UNILE_IN = 7; // Unicode Little Endian(Intel CPU) UNIBE_IN = 8; // Unicode Big Endian UTF8_IN = 9; // UTF8(TTF8N的BOM沾) UTF8N_IN = 10;// UTF8N UNILE_OUT = 7; UNIBE_OUT = 8; UTF8_OUT = 9; UTF8N_OUT = 10; // 擴張文字代碼檢查 function InCodeCheckEx(const s: string): integer; // UNICODE(Little Endian)SJIS改變 function uniLETosjis(const s: PWideChar): string; // UNICODE(Big Endian)SJIS改變 function uniBETosjis(const s: PWideChar): string; // UTF8SJIS改變 function Utf8Tosjis(const s: String): string; // UTF8NSJIS改變 function Utf8NTosjis(const s: String): string; // SJISUNICODE(LE)改變 procedure sjisToUniLE(var ms: TMemoryStream; const s: string); // SJISUNICODE(BE)改變 procedure sjisToUniBE(var ms: TMemoryStream; const s: string); // SJISUNICODE(UTF8)改變 function sjisToUtf8(const s: string): string; // SJISUNICODE(UTF8N)改變 function sjisToUtf8N(const s: string): string; implementation // 擴張文字代碼檢查 // UNICODE與UTF8核對、那個哪個們但關系っ稻田時在……中 // jconvert的InCodeCheck返回價 function InCodeCheckEx(const s: string): integer; var index, c, size: Integer; utfk: Boolean; begin size := Length(s); { Size = 0 } if size = 0 then begin Result := BINARY; Exit; end; { Unicode核對 } { 前頭的BOM隻有核對沒有因為錯誤啟動的可能性有 } if (size >= 2 ) then begin { UNICODE(Little Endian)核對 } if (s[1] = #$FF) and (s[2] = #$FE) then begin Result := UNILE_IN; Exit; end; { UNICODE(Big Endian)核對 } if (s[1] = #$FE) and (s[2] = #$FF) then begin Result := UNIBE_IN; Exit; end; end; { UTF-8核對 } if size > 3 then begin { UTF-8N(BOM有)核對 } { 前頭的BOM隻有核對沒有因為錯誤啟動的可能性有 } if (s[1] = #$EF) and (s[2] = #$BB) and (s[3] = #$BF) then begin Result := UTF8_IN; Exit; end; end; {UTF-8(BOM無)核對} index := 1; utfk := False; while (index <= STRICT_CHECK_LEN) and (index < size - 4) do begin c := Ord(s[index]); if (c in [$C0..$DF]) or (c > $EF) then begin utfk := False; Break; end; if c in [0..$7F] then begin ; end else if c = $E0 then begin Inc(index); c := Ord(s[index]); if c in [$A0..$BF] then begin Inc(index); c := Ord(s[index]); if c in [$80..$BF] then utfk := True else begin utfk := False; Break; end; end else begin utfk := False; Break; end; end else if c in [$E1..$EF] then begin Inc(index); c := Ord(s[index]); if c in [$80..$BF] then begin Inc(index); c := Ord(s[index]); if c in [$80..$BF] then utfk := True else begin utfk := False; Break; end; end else begin utfk := False; Break; end; end else begin utfk := False; Break; end; Inc(index); end; { 漢字有們UTF } if utfk then Result := UTF8N_IN { Unicde但UTF8但不ればJconvert核對 } else Result := InCodeCheck(s); end; function UniLETosjis(const s: PWideChar): string; begin Result := WideCharToString(s); end; function UniBETosjis(const s: PWideChar): string; var Pc: PChar; c: char; n: integer; begin Pc := PChar(s); n := 0; while True do begin if (Pc[n] = #0) and (Pc[n 1] = #0) then Break; c := Pc[n]; Pc[n] := Pc[n 1]; Pc[n 1] := c; Inc(n, 2); end; Result := WideCharToString(PWideChar(Pc)); end; procedure sjisToUniLE(var ms: TMemoryStream; const s: string); var PWs: PWideChar; Len: integer; begin if not Assigned(ms) then raise Exception.Create('無效的MemoryStream.'); Len := Length(s) * 2; PWs := AllocMem(Len 2); try StringToWideChar(s, PWs, Len); ms.Write(#$FF#$FE, 2); ms.Write(PWs^, Length(Pws) * 2); finally FreeMem(PWs); end; end; procedure sjisToUniBE(var ms: TMemoryStream; const s: string); var PWs: PWideChar; Pc: PChar; len, n: integer; Tc: Char; begin if not Assigned(ms) then raise Exception.Create('無效的MemoryStream.'); Len := Length(s) * 2; PWs := AllocMem(Len 2); try StringToWideChar(s, PWs, Len); Pc := PChar(PWs); n := 0; while n < len do begin Tc := (Pc n)^; (Pc n)^ := (Pc n 1)^; (Pc n 1)^ := Tc; Inc(n, 2); end; ms.Write(#$FE#$FF, 2); ms.Write(PWs^, Length(Pws) * 2); finally FreeMem(PWs); end; end; function Utf8NTosjis(const s: string): string; var Len: integer; OutStr: PWideChar; SIn, SOut: string; begin Result := ''; // gomi防止 SIn := S #0#0; Len := MultiByteToWideChar(CP_UTF8, 0, PChar(SIn), Length(SIn), nil, 0); if Len = 0 then raise Exception.Create('UTF8的字符串改變失敗了.'); // Len好應該、不知為何錯誤成為為了２加倍 OutStr := AllocMem(Len * 2); try MultiByteToWideChar(CP_UTF8, 0, PChar(SIn), Length(SIn), OutStr, Len); WideCharToStrVar(OutStr, SOut); Result := SOut; finally FreeMem(OutStr); end; end; function Utf8Tosjis(const s: string): string; var s2: string; begin s2 := s; Delete(s2, 1, 3); Result := Utf8NTosjis(s2); end; function SjisToUtf8N(const s: string): string; var Len: integer; InStr: PWideChar; OutStr: PChar; begin Result := ''; Len := Length(s) * 2 2; InStr := AllocMem(Len); try StringToWideChar(s, InStr, Len); OutStr := AllocMem(Len); try WideCharToMultiByte(CP_UTF8, 0, InStr, Length(InStr) * 2, OutStr, Len, nil, nil); Result := OutStr; finally FreeMem(OutStr); end; finally FreeMem(InStr); end; end; function SjisToUtf8(const s: string): string; begin Result := #$EF#$BB#$BF SjisToUtf8N(s); end; end.