本文提供一个java实现中文字符繁简体互换的zip包以及主要的源代码实现说明。
繁简体(GB<=>Big5)中文字符的转化实现原理很简单,就是根据两种码表的编码规则,创建两者之间的字符对应关系表,通过程序读取这个映射表来自动查出另一种编码方式下对应字符的字节编码,从而进行逐字节的内容替换。
主功能实现的GB2Big5.java源代码如下:
查看复制到剪切板打印
1.packagenet.java2000.tools;
2.
3.importjava.io.BufferedOutputStream;
4.importjava.io.FileOutputStream;
5.importjava.io.IOException;
6.importjava.io.InputStream;
7.
8. /**
9. *用来处理GB2312/BIG5码字符互相转换的类.<br>
10.*需要两个码表文件:gb-big5.table,/zeal/util/big5-gb.table.<br>
11.*这两个码表可以根据具体情况补充映射不正确的码.
12.*/
13.publicclassGB2Big5{
14.privatestaticGB2Big5pInstance=null;
15.
16.privateStrings_big5TableFile=null;
17.
18.privateStrings_gbTableFile=null;
19.
20.privatebyte[]b_big5Table=null;
21.
22.privatebyte[]b_gbTable=null;
23.
24./**指定两个码表文件来进行初始化*/
25.privateGB2Big5(StringsgbTableFile,Stringsbig5TableFile)throwsNullPointerException{
26.s_big5TableFile=sbig5TableFile;
27.s_gbTableFile=sgbTableFile;
28.if(null==b_gbTable){
29.b_gbTable=getBytesFromFile(sgbTableFile);
30.}
31.if(null==b_big5Table){
32.b_big5Table=getBytesFromFile(sbig5TableFile);
33.}
34.if(null==b_gbTable){
35.thrownewNullPointerException("Nogbtablecanbeload");
36.}
37.if(null==b_big5Table){
38.thrownewNullPointerException("Nobig5tablecanbeload");
39.}
40.}
41.
42.publicstaticsynchronizedGB2Big5getInstance(){
43.//returngetInstance("d:\\gb-big5.table","d:\\big5-gb.table");
44.returngetInstance("/net/java2000/tools/gb-big5.table","/net/java2000/tools/big5-gb.table");
45.}
46.
47.publicstaticsynchronizedGB2Big5getInstance(StringsgbTableFile,Stringsbig5TableFile){
48.if(null==pInstance){
49.try{
50.pInstance=newGB2Big5(sgbTableFile,sbig5TableFile);
51.}catch(Exceptione){
52.System.err.println(e.toString());
53.pInstance=null;
54.}
55.}
56.returnpInstance;
57.}
58.
59./**
60.*把gbChar对应的big5字符替换掉,用来更新码表文件.一般当发现字符映射不正确的时候可以通过这个方法来校正.
61.*/
62.protectedsynchronizedvoidresetBig5Char(StringgbChar,Stringbig5Char)throwsException{
63.byte[]Text=newString(gbChar.getBytes(),"GBK").getBytes("GBK");
64.byte[]TextBig5=newString(big5Char.getBytes(),"BIG5").getBytes("BIG5");
65.intmax=Text.length-1;
66.inth=0;
67.intl=0;
68.intp=0;
69.intb=256;
70.for(inti=0;i<max;i++){
71.h=(int)(Text[i]);
72.if(h<0){
73.h=b+h;
74.l=(int)(Text[i+1]);
75.if(l<0){
76.l=b+(int)(Text[i+1]);
77.}
78.if(h==161&&l==64){
79.;//donothing
80.}else{
81.p=(h-160)*510+(l-1)*2;
82.b_gbTable[p]=TextBig5[i];
83.b_gbTable[p+1]=TextBig5[i+1];
84.}
85.i++;
86.}
87.}
88.BufferedOutputStreampWriter=newBufferedOutputStream(newFileOutputStream(s_gbTableFile));
89.pWriter.write(b_gbTable,0,b_gbTable.length);
90.pWriter.close();
91.}
92.
93./**
94.*把big5Char对应的gb字符替换掉,用来更新码表文件.一般当发现字符映射不正确的时候可以通过这个方法来校正.
95.*/
96.protectedsynchronizedvoidresetGbChar(Stringbig5Char,StringgbChar)throwsException{
97.byte[]TextGb=newString(gbChar.getBytes(),"GBK").getBytes("GBK");
98.byte[]Text=newString(big5Char.getBytes(),"BIG5").getBytes("BIG5");
99.intmax=Text.length-1;
100.inth=0;
101.intl=0;
102.intp=0;
103.intb=256;
104.for(inti=0;i<max;i++){
105. h=(int)(Text[i]);
106.if(h<0){
107. h=b+h;
108. l=(int)(Text[i+1]);
109.if(l<0){
110. l=b+(int)(Text[i+1]);
111. }
112.if(h==161&&l==64){
113. ;//donothing
114. }else{
115. p=(h-160)*510+(l-1)*2;
116. b_big5Table[p]=TextGb[i];
117. b_big5Table[p+1]=TextGb[i+1];
118. }
119. i++;
120. }
121. }
122. BufferedOutputStreampWriter=newBufferedOutputStream(newFileOutputStream(s_big5TableFile));
123. pWriter.write(b_big5Table,0,b_big5Table.length);
124. pWriter.close();
125. }
126.
127. /**把gb2312编码的字符串转化成big5码的字节流*/
128.publicbyte[]gb2big5(StringinStr)throwsException{
129.if(null==inStr||inStr.length()<=0){
130.return"".getBytes();
131. //return"";
132. }
133.byte[]Text=newString(inStr.getBytes(),"GBK").getBytes("GBK");
134.intmax=Text.length-1;
135.inth=0;
136.intl=0;
137.intp=0;
138.intb=256;
139.byte[]big=newbyte[2];
140.for(inti=0;i<max;i++){
141. h=(int)(Text[i]);
142.if(h<0){
143. h=b+h;
144. l=(int)(Text[i+1]);
145.if(l<0){
146. l=b+(int)(Text[i+1]);
147. }
148.if(h==161&&l==64){
149. big[0]=big[1]=(byte)(161-b);
150. }else{
151. p=(h-160)*510+(l-1)*2;
152.try{
153. big[0]=(byte)(b_gbTable[p]-b);
154. }catch(Exceptione){
155. big[0]=45;
156. }
157.try{
158. big[1]=(byte)(b_gbTable[p+1]-b);
159. }catch(Exceptione){
160. big[1]=45;
161. }
162. }
163. Text[i]=big[0];
164. Text[i+1]=big[1];
165. i++;
166. }
167. }
168.returnText;
169. //returnnewString(Text);
170. }
171.
172. /**把big5码的字符串转化成gb2312码的字符串*/
173.publicStringbig52gb(StringinStr)throwsException{
174.if(null==inStr||inStr.length()<=0){
175.return"";
176. }
177.byte[]Text=newString(inStr.getBytes(),"BIG5").getBytes("BIG5");
178.intmax=Text.length-1;
179.inth=0;
180.intl=0;
181.intp=0;
182.intb=256;
183.byte[]big=newbyte[2];
184.for(inti=0;i<max;i++){
185. h=(int)(Text[i]);
186.if(h<0){
187. h=b+h;
188. l=(int)(Text[i+1]);
189.if(l<0){
190. l=b+(int)(Text[i+1]);
191. }
192.if(h==161&&l==161){
193. big[0]=(byte)(161-b);
194. big[1]=(byte)(64-b);
195. }else{
196. p=(h-160)*510+(l-1)*2;
197.try{
198. big[0]=(byte)(b_big5Table[p]-b);
199. }catch(Exceptione){
200. big[0]=45;
201. }
202.try{
203. big[1]=(byte)(b_big5Table[p+1]-b);
204. }catch(Exceptione){
205. big[1]=45;
206. }
207. }
208. Text[i]=big[0];
209. Text[i+1]=big[1];
210. i++;
211. }
212. }
213.returnnewString(Text);
214. }
215.
216. /**把文件读入字节数组,读取失败则返回null*/
217.privatestaticbyte[]getBytesFromFile(StringinFileName){
218.try{
219. InputStreamin=GB2Big5.class.getResourceAsStream(inFileName);
220.byte[]sContent=StreamConverter.toByteArray(in);
221. in.close();
222.returnsContent;
223. /*
224. *java.io.RandomAccessFileinStream=newjava.io.RandomAccessFile(inFileName,"r");byte[]sContent=newbyte[(int)
225. *(inStream.length())];inStream.read(sContent);inStream.close();returnsContent;
226. */
227. }catch(Exceptione){
228. e.printStackTrace();
229.returnnull;
230. }
231. }
232.
233.publicstaticvoidmain(String[]args)throwsException{
234.if(args.length<2){
235. System.out.println("Usage:net.java2000.tools.GB2Big5[-gb|-big5]inputstring");
236. System.exit(1);
237.return;
238. }
239.booleanbIsGB=true;
240. StringinStr="";
241.for(inti=0;i<args.length;i++){
242.if(args[i].equalsIgnoreCase("-gb")){
243. bIsGB=true;
244. }elseif(args[i].equalsIgnoreCase("-big5")){
245. bIsGB=false;
246. }else{
247. inStr=args[i];
248. }
249. }
250. GB2Big5pTmp=GB2Big5.getInstance();
251. StringoutStr="";
252.if(bIsGB){
253. outStr=pTmp.big52gb(inStr);
254. }else{
255. outStr=newString(pTmp.gb2big5(inStr),"BIG5");
256. }
257. System.out.println("String["+inStr+"]convertedinto:\n["+outStr+"]");
258. }
259. }
260.
261.classStreamConverter{
262.publicStreamConverter(){
263. }
264.
265.publicstaticbyte[]toByteArray(InputStreaminput)throwsIOException{
266.intstatus=0;
267.inttotalBytesRead=0;
268.intblockCount=1;
269.bytedynamicBuffer[]=newbyte[5000*blockCount];
270.bytebuffer[]=newbyte[5000];
271.booleanendOfStream=false;
272.do{
273.if(endOfStream)
274.break;
275.intbytesRead=0;
276.if(input.available()!=0){
277. status=input.read(buffer);
278. endOfStream=status==-1;
279.if(!endOfStream)
280. bytesRead=status;
281. }else{
282. status=input.read();
283. endOfStream=status==-1;
284. buffer[0]=(byte)status;
285.if(!endOfStream)
286. bytesRead=1;
287. }
288.if(!endOfStream){
289.if(totalBytesRead+bytesRead>5000*blockCount){
290. blockCount++;
291.bytenewBuffer[]=newbyte[5000*blockCount];
292. System.arraycopy(dynamicBuffer,0,newBuffer,0,totalBytesRead);
293. dynamicBuffer=newBuffer;
294. }
295. System.arraycopy(buffer,0,dynamicBuffer,totalBytesRead,bytesRead);
296. totalBytesRead+=bytesRead;
297. }
298. }while(true);
299.byteresult[]=newbyte[totalBytesRead];
300.if(totalBytesRead!=0)
301. System.arraycopy(dynamicBuffer,0,result,0,totalBytesRead);
302.returnresult;
303. }
304. }