首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 开发语言 > 编程 >

java获取资料编码格式

2012-11-01 
java获取文件编码格式public static String get_charset( File file ) {??????? String charset GBK?

java获取文件编码格式

public static String get_charset( File file ) {
??????? String charset = "GBK";
??????? byte[] first3Bytes = new byte[3];
??????? try {
??????????? boolean checked = false;
??????????? BufferedInputStream bis = new BufferedInputStream( new FileInputStream( file ) );
??????????? bis.mark( 0 );
??????????? int read = bis.read( first3Bytes, 0, 3 );
??????????? if ( read == -1 ) return charset;
??????????? if ( first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE ) {
??????????????? charset = "UTF-16LE";
??????????????? checked = true;
??????????? }else if ( first3Bytes[0] == (byte) 0xFE && first3Bytes[1] == (byte) 0xFF ) {
??????????????? charset = "UTF-16BE";
??????????????? checked = true;
??????????? }else if ( first3Bytes[0] == (byte) 0xEF && first3Bytes[1] == (byte) 0xBB && first3Bytes[2] == (byte) 0xBF ) {
??????????????? charset = "UTF-8";
??????????????? checked = true;
??????????? }
??????????? bis.reset();
??????????? if ( !checked ) {
??????????? //??? int len = 0;
??????????????? int loc = 0;
??????????????? while ( (read = bis.read()) != -1 ) {
??????????????????? loc++;
??????????????????? if ( read >= 0xF0 ) break;
??????????????????? if ( 0x80 <= read && read <= 0xBF ) // 单独出现BF以下的,也算是GBK
??????????????????? break;
??????????????????? if ( 0xC0 <= read && read <= 0xDF ) {
??????????????????????? read = bis.read();
??????????????????????? if ( 0x80 <= read && read <= 0xBF ) // 双字节 (0xC0 - 0xDF) (0x80
??????????????????????????????????????????????????????????????????????? // - 0xBF),也可能在GB编码内??
??????????????????????? continue;
??????????????????????? else break;
??????????????????? }
??????????????????? else if ( 0xE0 <= read && read <= 0xEF ) {// 也有可能出错,但是几率较小
??????????????????????? read = bis.read();
??????????????????????? if ( 0x80 <= read && read <= 0xBF ) {
??????????????????????????? read = bis.read();
??????????????????????????? if ( 0x80 <= read && read <= 0xBF ) {
??????????????????????????????? charset = "UTF-8";
??????????????????????????????? break;
??????????????????????????? }
??????????????????????????? else break;
??????????????????????? }
??????????????????????? else break;
??????????????????? }
??????????????? }
??????????????? //System.out.println( loc + " " + Integer.toHexString( read ) );
??????????? }
??????????? bis.close();
??????? } catch ( Exception e ) {
??????????? e.printStackTrace();
??????? }
??????? return charset;
??? }

热点排行