首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 其他教程 > 其他相关 >

Java兑现GBK编码文件繁简体转换

2013-01-26 
Java实现GBK编码文件繁简体转换最近喜欢上高品质音乐了,但是cue很多抓下来是繁体中文,看上去略不爽。大陆的

Java实现GBK编码文件繁简体转换

最近喜欢上高品质音乐了,但是cue很多抓下来是繁体中文,看上去略不爽。大陆的windows都是用的GBK编码,准备拿来转码一下。分三步。

第一步,下载GBK中文字库。放到wps里,然后进行繁简体转换,得到繁简对应。放到txt文件中,使用ANSI编码。

第二步,对字库进行预处理。包括两部分:去重,排序。

去重指把繁简体相同的字符去掉。

代码:


import java.io.BufferedReader;import java.io.BufferedWriter;import java.io.FileReader;import java.io.FileWriter;import java.io.IOException;/* * @author GT * change all tradition Chinese in the file and store the simplified version in simple+filename  * all encoded by GBK * 2013.1.17 * */public class Main {static String traditional = null;static String simple = null;/** * @param args * @throws IOException */public static void main(String[] args) throws IOException {// TODO Auto-generated method stubif (args.length != 1) {System.err.println("not enough files");return;} else {// initialinitial();//BufferedReader br = new BufferedReader(new FileReader(args[0]));BufferedWriter bw = new BufferedWriter(new FileWriter("simple"+ args[0]));String line = null;while ((line = br.readLine()) != null) {simplify(line, bw);bw.newLine();}bw.flush();br.close();bw.close();}}private static void initial() throws IOException {BufferedReader br = new BufferedReader(new FileReader("traditional.txt"));traditional = br.readLine();br.close();br = new BufferedReader(new FileReader("simple.txt"));simple = br.readLine();br.close();// for (int i = 0; i < 100; ++i) {// System.out.printf("%d ", (int) tradition.charAt(i));// }}private static void simplify(String line, BufferedWriter bw)throws IOException {// TODO Auto-generated method stubint index = -1;for (int i = 0; i < line.length(); ++i) {if ((index = find(line.charAt(i))) != -1) {bw.append(simple.charAt(index));} else {bw.append(line.charAt(i));}}}/* * binary search 2013.1.18 */private static int find(char ch) {// TODO Auto-generated method stubint low, high, mid, res;low = 0;high = traditional.length();res = -1;while (low <= high) {mid = (low + high) / 2;if (traditional.charAt(mid) == ch) {res = mid;break;} else {if ((int) traditional.charAt(mid) < (int) ch) {low = mid + 1;} else {high = mid - 1;}}}return res;// return tradition.indexOf(ch);}}


热点排行