1000字范文,内容丰富有趣,学习的好帮手!
1000字范文 > 繁简体(GB=Big5)字符串互转的JAVA方式实现

繁简体(GB=Big5)字符串互转的JAVA方式实现

时间:2022-09-13 01:27:44

相关推荐

繁简体(GB=Big5)字符串互转的JAVA方式实现

本文提供一个java实现中文字符繁简体互换的zip包以及主要的源代码实现说明。

繁简体(GB<=>Big5)中文字符的转化实现原理很简单,就是根据两种码表的编码规则,创建两者之间的字符对应关系表,通过程序读取这个映射表来自动查出另一种编码方式下对应字符的字节编码,从而进行逐字节的内容替换。

主功能实现的GB2Big5.java源代码如下:

查看复制到剪切板打印

1.packagenet.java2000.tools;

2.

3.importjava.io.BufferedOutputStream;

4.importjava.io.FileOutputStream;

5.importjava.io.IOException;

6.importjava.io.InputStream;

7.

8. /**

9. *用来处理GB2312/BIG5码字符互相转换的类.<br>

10.*需要两个码表文件:gb-big5.table,/zeal/util/big5-gb.table.<br>

11.*这两个码表可以根据具体情况补充映射不正确的码.

12.*/

13.publicclassGB2Big5{

14.privatestaticGB2Big5pInstance=null;

15.

16.privateStrings_big5TableFile=null;

17.

18.privateStrings_gbTableFile=null;

19.

20.privatebyte[]b_big5Table=null;

21.

22.privatebyte[]b_gbTable=null;

23.

24./**指定两个码表文件来进行初始化*/

25.privateGB2Big5(StringsgbTableFile,Stringsbig5TableFile)throwsNullPointerException{

26.s_big5TableFile=sbig5TableFile;

27.s_gbTableFile=sgbTableFile;

28.if(null==b_gbTable){

29.b_gbTable=getBytesFromFile(sgbTableFile);

30.}

31.if(null==b_big5Table){

32.b_big5Table=getBytesFromFile(sbig5TableFile);

33.}

34.if(null==b_gbTable){

35.thrownewNullPointerException("Nogbtablecanbeload");

36.}

37.if(null==b_big5Table){

38.thrownewNullPointerException("Nobig5tablecanbeload");

39.}

40.}

41.

42.publicstaticsynchronizedGB2Big5getInstance(){

43.//returngetInstance("d:\\gb-big5.table","d:\\big5-gb.table");

44.returngetInstance("/net/java2000/tools/gb-big5.table","/net/java2000/tools/big5-gb.table");

45.}

46.

47.publicstaticsynchronizedGB2Big5getInstance(StringsgbTableFile,Stringsbig5TableFile){

48.if(null==pInstance){

49.try{

50.pInstance=newGB2Big5(sgbTableFile,sbig5TableFile);

51.}catch(Exceptione){

52.System.err.println(e.toString());

53.pInstance=null;

54.}

55.}

56.returnpInstance;

57.}

58.

59./**

60.*把gbChar对应的big5字符替换掉,用来更新码表文件.一般当发现字符映射不正确的时候可以通过这个方法来校正.

61.*/

62.protectedsynchronizedvoidresetBig5Char(StringgbChar,Stringbig5Char)throwsException{

63.byte[]Text=newString(gbChar.getBytes(),"GBK").getBytes("GBK");

64.byte[]TextBig5=newString(big5Char.getBytes(),"BIG5").getBytes("BIG5");

65.intmax=Text.length-1;

66.inth=0;

67.intl=0;

68.intp=0;

69.intb=256;

70.for(inti=0;i<max;i++){

71.h=(int)(Text[i]);

72.if(h<0){

73.h=b+h;

74.l=(int)(Text[i+1]);

75.if(l<0){

76.l=b+(int)(Text[i+1]);

77.}

78.if(h==161&&l==64){

79.;//donothing

80.}else{

81.p=(h-160)*510+(l-1)*2;

82.b_gbTable[p]=TextBig5[i];

83.b_gbTable[p+1]=TextBig5[i+1];

84.}

85.i++;

86.}

87.}

88.BufferedOutputStreampWriter=newBufferedOutputStream(newFileOutputStream(s_gbTableFile));

89.pWriter.write(b_gbTable,0,b_gbTable.length);

90.pWriter.close();

91.}

92.

93./**

94.*把big5Char对应的gb字符替换掉,用来更新码表文件.一般当发现字符映射不正确的时候可以通过这个方法来校正.

95.*/

96.protectedsynchronizedvoidresetGbChar(Stringbig5Char,StringgbChar)throwsException{

97.byte[]TextGb=newString(gbChar.getBytes(),"GBK").getBytes("GBK");

98.byte[]Text=newString(big5Char.getBytes(),"BIG5").getBytes("BIG5");

99.intmax=Text.length-1;

100.inth=0;

101.intl=0;

102.intp=0;

103.intb=256;

104.for(inti=0;i<max;i++){

105. h=(int)(Text[i]);

106.if(h<0){

107. h=b+h;

108. l=(int)(Text[i+1]);

109.if(l<0){

110. l=b+(int)(Text[i+1]);

111. }

112.if(h==161&&l==64){

113. ;//donothing

114. }else{

115. p=(h-160)*510+(l-1)*2;

116. b_big5Table[p]=TextGb[i];

117. b_big5Table[p+1]=TextGb[i+1];

118. }

119. i++;

120. }

121. }

122. BufferedOutputStreampWriter=newBufferedOutputStream(newFileOutputStream(s_big5TableFile));

123. pWriter.write(b_big5Table,0,b_big5Table.length);

124. pWriter.close();

125. }

126.

127. /**把gb2312编码的字符串转化成big5码的字节流*/

128.publicbyte[]gb2big5(StringinStr)throwsException{

129.if(null==inStr||inStr.length()<=0){

130.return"".getBytes();

131. //return"";

132. }

133.byte[]Text=newString(inStr.getBytes(),"GBK").getBytes("GBK");

134.intmax=Text.length-1;

135.inth=0;

136.intl=0;

137.intp=0;

138.intb=256;

139.byte[]big=newbyte[2];

140.for(inti=0;i<max;i++){

141. h=(int)(Text[i]);

142.if(h<0){

143. h=b+h;

144. l=(int)(Text[i+1]);

145.if(l<0){

146. l=b+(int)(Text[i+1]);

147. }

148.if(h==161&&l==64){

149. big[0]=big[1]=(byte)(161-b);

150. }else{

151. p=(h-160)*510+(l-1)*2;

152.try{

153. big[0]=(byte)(b_gbTable[p]-b);

154. }catch(Exceptione){

155. big[0]=45;

156. }

157.try{

158. big[1]=(byte)(b_gbTable[p+1]-b);

159. }catch(Exceptione){

160. big[1]=45;

161. }

162. }

163. Text[i]=big[0];

164. Text[i+1]=big[1];

165. i++;

166. }

167. }

168.returnText;

169. //returnnewString(Text);

170. }

171.

172. /**把big5码的字符串转化成gb2312码的字符串*/

173.publicStringbig52gb(StringinStr)throwsException{

174.if(null==inStr||inStr.length()<=0){

175.return"";

176. }

177.byte[]Text=newString(inStr.getBytes(),"BIG5").getBytes("BIG5");

178.intmax=Text.length-1;

179.inth=0;

180.intl=0;

181.intp=0;

182.intb=256;

183.byte[]big=newbyte[2];

184.for(inti=0;i<max;i++){

185. h=(int)(Text[i]);

186.if(h<0){

187. h=b+h;

188. l=(int)(Text[i+1]);

189.if(l<0){

190. l=b+(int)(Text[i+1]);

191. }

192.if(h==161&&l==161){

193. big[0]=(byte)(161-b);

194. big[1]=(byte)(64-b);

195. }else{

196. p=(h-160)*510+(l-1)*2;

197.try{

198. big[0]=(byte)(b_big5Table[p]-b);

199. }catch(Exceptione){

200. big[0]=45;

201. }

202.try{

203. big[1]=(byte)(b_big5Table[p+1]-b);

204. }catch(Exceptione){

205. big[1]=45;

206. }

207. }

208. Text[i]=big[0];

209. Text[i+1]=big[1];

210. i++;

211. }

212. }

213.returnnewString(Text);

214. }

215.

216. /**把文件读入字节数组,读取失败则返回null*/

217.privatestaticbyte[]getBytesFromFile(StringinFileName){

218.try{

219. InputStreamin=GB2Big5.class.getResourceAsStream(inFileName);

220.byte[]sContent=StreamConverter.toByteArray(in);

221. in.close();

222.returnsContent;

223. /*

224. *java.io.RandomAccessFileinStream=newjava.io.RandomAccessFile(inFileName,"r");byte[]sContent=newbyte[(int)

225. *(inStream.length())];inStream.read(sContent);inStream.close();returnsContent;

226. */

227. }catch(Exceptione){

228. e.printStackTrace();

229.returnnull;

230. }

231. }

232.

233.publicstaticvoidmain(String[]args)throwsException{

234.if(args.length<2){

235. System.out.println("Usage:net.java2000.tools.GB2Big5[-gb|-big5]inputstring");

236. System.exit(1);

237.return;

238. }

239.booleanbIsGB=true;

240. StringinStr="";

241.for(inti=0;i<args.length;i++){

242.if(args[i].equalsIgnoreCase("-gb")){

243. bIsGB=true;

244. }elseif(args[i].equalsIgnoreCase("-big5")){

245. bIsGB=false;

246. }else{

247. inStr=args[i];

248. }

249. }

250. GB2Big5pTmp=GB2Big5.getInstance();

251. StringoutStr="";

252.if(bIsGB){

253. outStr=pTmp.big52gb(inStr);

254. }else{

255. outStr=newString(pTmp.gb2big5(inStr),"BIG5");

256. }

257. System.out.println("String["+inStr+"]convertedinto:\n["+outStr+"]");

258. }

259. }

260.

261.classStreamConverter{

262.publicStreamConverter(){

263. }

264.

265.publicstaticbyte[]toByteArray(InputStreaminput)throwsIOException{

266.intstatus=0;

267.inttotalBytesRead=0;

268.intblockCount=1;

269.bytedynamicBuffer[]=newbyte[5000*blockCount];

270.bytebuffer[]=newbyte[5000];

271.booleanendOfStream=false;

272.do{

273.if(endOfStream)

274.break;

275.intbytesRead=0;

276.if(input.available()!=0){

277. status=input.read(buffer);

278. endOfStream=status==-1;

279.if(!endOfStream)

280. bytesRead=status;

281. }else{

282. status=input.read();

283. endOfStream=status==-1;

284. buffer[0]=(byte)status;

285.if(!endOfStream)

286. bytesRead=1;

287. }

288.if(!endOfStream){

289.if(totalBytesRead+bytesRead>5000*blockCount){

290. blockCount++;

291.bytenewBuffer[]=newbyte[5000*blockCount];

292. System.arraycopy(dynamicBuffer,0,newBuffer,0,totalBytesRead);

293. dynamicBuffer=newBuffer;

294. }

295. System.arraycopy(buffer,0,dynamicBuffer,totalBytesRead,bytesRead);

296. totalBytesRead+=bytesRead;

297. }

298. }while(true);

299.byteresult[]=newbyte[totalBytesRead];

300.if(totalBytesRead!=0)

301. System.arraycopy(dynamicBuffer,0,result,0,totalBytesRead);

302.returnresult;

303. }

304. }

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。