字符编码工具种

2012-10-15
字符编码工具类字符编码工具类package charToolsimport java.io.UnsupportedEncodingExceptionimport ja
字符编码工具类

字符编码工具类
package charTools;import java.io.UnsupportedEncodingException;import java.net.URLEncoder;import java.net.URLDecoder;import java.security.*;import java.text.*;import java.util.*;/** * Title:字符编码工具类 * Description: * Copyright: flashman.com.cn Copyright (c) 2005 * Company: flashman.com.cn * @author: jeffzhu * @version 1.0 *//* *===================功能说明==================================== * *================================字符操作======================= *String chopAtWord(String string, int length)   从字符串第一位开始取n位字符 * * * *=================================编码转换====================== *String ISO2GB(String text)           转换编码 ISO-8859-1到GB2312 *String GB2ISO(String text)           转换编码 GB2312到ISO-8859-1 *String Utf8URLencode(String text)    Utf8URL编码 *String Utf8URLdecode(String text)    Utf8URL解码 *String CodeToWord(String text）       utf8URL编码转字符 *boolean Utf8codeCheck(String text)   编码是否有效 *boolean   isUtf8Url(String text)     是否Utf8Url编码 * * *==================================加密解密====================== *synchronized static final String hash(String data)    MessageDigest加密 *String encodeBase64(String data)                      base64加密 *public static String decodeBase64(String data)        base64解密 * * */public class charTools1 {/** * 转换编码 ISO-8859-1到GB2312 * @param text * @return */  public String ISO2GB(String text) {    String result = "";    try {      result = new String(text.getBytes("ISO-8859-1"), "GB2312");    }    catch (UnsupportedEncodingException ex) {      result = ex.toString();    }    return result;  }  /**   * 转换编码 GB2312到ISO-8859-1   * @param text   * @return   */  public String GB2ISO(String text) {    String result = "";    try {      result = new String(text.getBytes("GB2312"), "ISO-8859-1");    }    catch (UnsupportedEncodingException ex) {      ex.printStackTrace();    }    return result;  }  /**   * Utf8URL编码   * @param s   * @return   */  public String Utf8URLencode(String text) {    StringBuffer result = new StringBuffer();    for (int i = 0; i < text.length(); i++) {      char c = text.charAt(i);      if (c >= 0 && c <= 255) {        result.append(c);      }else {        byte[] b = new byte[0];        try {          b = Character.toString(c).getBytes("UTF-8");        }catch (Exception ex) {        }        for (int j = 0; j < b.length; j++) {          int k = b[j];          if (k < 0) k += 256;          result.append("%" + Integer.toHexString(k).toUpperCase());        }      }    }    return result.toString();  }  /**   * Utf8URL解码   * @param text   * @return   */  public String Utf8URLdecode(String text) {    String result = "";    int p = 0;    if (text!=null && text.length()>0){      text = text.toLowerCase();      p = text.indexOf("%e");      if (p == -1) return text;      while (p != -1) {        result += text.substring(0, p);        text = text.substring(p, text.length());        if (text == "" || text.length() < 9) return result;        result += CodeToWord(text.substring(0, 9));        text = text.substring(9, text.length());        p = text.indexOf("%e");      }    }    return result + text;  }  /**   * utf8URL编码转字符   * @param text   * @return   */  private String CodeToWord(String text) {    String result;    if (Utf8codeCheck(text)) {      byte[] code = new byte[3];      code[0] = (byte) (Integer.parseInt(text.substring(1, 3), 16) - 256);      code[1] = (byte) (Integer.parseInt(text.substring(4, 6), 16) - 256);      code[2] = (byte) (Integer.parseInt(text.substring(7, 9), 16) - 256);      try {        result = new String(code, "UTF-8");      }catch (UnsupportedEncodingException ex) {        result = null;      }    }    else {      result = text;    }    return result;  }  public static boolean isValidUtf8(byte[] b, int aMaxCount) {    int lLen = b.length, lCharCount = 0;    for (int i = 0; i < lLen && lCharCount < aMaxCount; ++lCharCount) {      byte lByte = b[i++]; //to fast operation, ++ now, ready for the following for(;;)      if (lByte >= 0) continue; //>=0 is normal ascii      if (lByte < (byte) 0xc0 || lByte > (byte) 0xfd)        return false;      int lCount = lByte > (byte) 0xfc ? 5 : lByte > (byte) 0xf8 ? 4 : lByte > (byte) 0xf0 ? 3 : lByte > (byte) 0xe0 ? 2 : 1;      if (i + lCount > lLen) return false;      for (int j = 0; j < lCount; ++j, ++i)        if (b[i] >= (byte) 0xc0)return false;    }    return true;  }  /**   * 编码是否有效   * @param text   * @return   */  private boolean Utf8codeCheck(String text){    String sign = "";    if (text.startsWith("%e"))      for (int i = 0, p = 0; p != -1; i++) {        p = text.indexOf("%", p);        if (p != -1)          p++;        sign += p;      }    return sign.equals("147-1");  }  /**   * 是否Utf8Url编码   * @param text   * @return   */  public boolean isUtf8Url(String text) {    text = text.toLowerCase();    int p = text.indexOf("%");    if (p != -1 && text.length() - p > 9) {      text = text.substring(p, p + 9);    }    return Utf8codeCheck(text);  }  /* * ======================================加密解密================================================= */      /**     * Used by the hash method.     */    private static MessageDigest digest = null;    /** * 将传入的参数转化为它所对应的hash码     * @param data the String to compute the hash of.     * @return a hashed version of the passed-in String     */    public synchronized static final String hash(String data) {        if (digest == null) {            try {                digest = MessageDigest.getInstance("MD5");            }            catch (NoSuchAlgorithmException nsae) {                System.err.println("Failed to load the MD5 MessageDigest. " +                "Jive will be unable to function normally.");                nsae.printStackTrace();            }        }        // Now, compute hash.        digest.update(data.getBytes());        return encodeHex(digest.digest());    }    /** * 将传入的byte型数组转化为对应的十六进制，并组合成字符串输出     * Turns an array of bytes into a String representing each byte as an     * unsigned hex number.     * <p>     * Method by Santeri Paavolainen, Helsinki Finland 1996<br>     * (c) Santeri Paavolainen, Helsinki Finland 1996<br>     * Distributed under LGPL.     *     * @param bytes an array of bytes to convert to a hex-string     * @return generated hex string     */    public static final String encodeHex(byte[] bytes) {        StringBuffer buf = new StringBuffer(bytes.length * 2);        int i;        for (i = 0; i < bytes.length; i++) {            if (((int) bytes[i] & 0xff) < 0x10) {                buf.append("0");            }            buf.append(Long.toString((int) bytes[i] & 0xff, 16));        }        return buf.toString();    }    /** * 将十六进制数字型的字符串转化为byte型的数组，将字符串按两位两位分开     * Turns a hex encoded string into a byte array. It is specifically meant     * to "reverse" the toHex(byte[]) method.         * @param hex a hex encoded String to transform into a byte array.     * @return a byte array representing the hex String[     */    public static final byte[] decodeHex(String hex) {        char [] chars = hex.toCharArray();        byte[] bytes = new byte[chars.length/2];        int byteCount = 0;        for (int i=0; i<chars.length; i+=2) {            byte newByte = 0x00;            newByte |= hexCharToByte(chars[i]);            newByte <<= 4;            newByte |= hexCharToByte(chars[i+1]);            bytes[byteCount] = newByte;            byteCount++;        }        return bytes;    }    /**     * Returns the the byte value of a hexadecmical char (0-f). It's assumed     * that the hexidecimal chars are lower case as appropriate.     *     * @param ch a hexedicmal character (0-f)     * @return the byte value of the character (0x00-0x0F)     */    private static final byte hexCharToByte(char ch) {        switch(ch) {            case '0': return 0x00;            case '1': return 0x01;            case '2': return 0x02;            case '3': return 0x03;            case '4': return 0x04;            case '5': return 0x05;            case '6': return 0x06;            case '7': return 0x07;            case '8': return 0x08;            case '9': return 0x09;            case 'a': return 0x0A;            case 'b': return 0x0B;            case 'c': return 0x0C;            case 'd': return 0x0D;            case 'e': return 0x0E;            case 'f': return 0x0F;        }        return 0x00;    }        //*********************************************************************    //* Base64 - a simple base64 encoder and decoder.    //*    //*     Copyright (c) 1999, Bob Withers - bwit@pobox.com    //*    //* This code may be freely used for any purpose, either personal    //* or commercial, provided the authors copyright notice remains    //* intact.    //*********************************************************************    /**base64加密     * Encodes a String as a base64 String.     *     * @param data a String to encode.     * @return a base64 encoded String.     */    public static String encodeBase64(String data) {        return encodeBase64(data.getBytes());    }    /**     * Encodes a byte array into a base64 String.     *     * @param data a byte array to encode.     * @return a base64 encode String.     */    public static String encodeBase64(byte[] data) {        int c;        int len = data.length;        StringBuffer ret = new StringBuffer(((len / 3) + 1) * 4);        for (int i = 0; i < len; ++i) {            c = (data[i] >> 2) & 0x3f;            ret.append(cvt.charAt(c));            c = (data[i] << 4) & 0x3f;            if (++i < len)                c |= (data[i] >> 4) & 0x0f;            ret.append(cvt.charAt(c));            if (i < len) {                c = (data[i] << 2) & 0x3f;                if (++i < len)                    c |= (data[i] >> 6) & 0x03;                ret.append(cvt.charAt(c));            }            else {                ++i;                ret.append((char) fillchar);            }            if (i < len) {                c = data[i] & 0x3f;                ret.append(cvt.charAt(c));            }            else {                ret.append((char) fillchar);            }        }        return ret.toString();    }    /**base64解密     * Decodes a base64 String.     *     * @param data a base64 encoded String to decode.     * @return the decoded String.     */    public static String decodeBase64(String data) {        return decodeBase64(data.getBytes());    }    /**     * Decodes a base64 aray of bytes.     *     * @param data a base64 encode byte array to decode.     * @return the decoded String.     */    public static String decodeBase64(byte[] data) {        int c, c1;        int len = data.length;        StringBuffer ret = new StringBuffer((len * 3) / 4);        for (int i = 0; i < len; ++i) {            c = cvt.indexOf(data[i]);            ++i;            c1 = cvt.indexOf(data[i]);            c = ((c << 2) | ((c1 >> 4) & 0x3));            ret.append((char) c);            if (++i < len) {                c = data[i];                if (fillchar == c)                    break;                c = cvt.indexOf((char) c);                c1 = ((c1 << 4) & 0xf0) | ((c >> 2) & 0xf);                ret.append((char) c1);            }            if (++i < len) {                c1 = data[i];                if (fillchar == c1)                    break;                c1 = cvt.indexOf((char) c1);                c = ((c << 6) & 0xc0) | c1;                ret.append((char) c);            }        }        return ret.toString();    }    private static final int fillchar = '=';    private static final String cvt = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"                                    + "abcdefghijklmnopqrstuvwxyz"                                    + "0123456789+/";                /**     * 从字符串中取n位字符     *     * @param string the String to chop.     * @param length the index in <code>string</code> to start looking for a     *       whitespace boundary at.     * @return a substring of <code>string</code> whose length is less than or     *       equal to <code>length</code>, and that is chopped at whitespace.     */     public static final String chopAtWord(String string, int length) {         if (string == null) {             return string;         }         char [] charArray = string.toCharArray();         int sLength = string.length();         if (length < sLength) {             sLength = length;         }         // First check if there is a newline character before length; if so,         // chop word there.         for (int i=0; i<sLength-1; i++) {             // Windows             if (charArray[i] == '\r' && charArray[i+1] == '\n') {                 return string.substring(0, i+1);             }             // Unix             else if (charArray[i] == '\n') {                 return string.substring(0, i);             }         }         // Also check boundary case of Unix newline         if (charArray[sLength-1] == '\n') {             return string.substring(0, sLength-1);         }         // Done checking for newline, now see if the total string is less than         // the specified chop point.         if (string.length() < length) {             return string;         }         // No newline, so chop at the first whitespace.         for (int i = length-1; i > 0; i--) {             if (charArray[i] == ' ') {                 return string.substring(0, i).trim();             }         }         // Did not find word boundary so return original String chopped at         // specified length.         return string.substring(0, length);     }      /**   * 测试   * @param args   */  public static void main(String[] args) {           //实例化工具类    charTools1 charTools = new charTools1();    String url;    //定义一个URL字符串    url="http://www.baidu.com/s?bs=indexof+%D3%C3%B7%A8&f=8&wd=java+%D7%D6%B7%FB%B4%A6%C0%ED%B9%A4%BE%DF%C0%E0";//    url = "http://www.google.com/search?hl=zh-CN&newwindow=1&q=%E4%B8%AD%E5%9B%BD%E5%A4%A7%E7%99%BE%E7%A7%91%E5%9C%A8%E7%BA%BF%E5%85%A8%E6%96%87%E6%A3%80%E7%B4%A2&btnG=%E6%90%9C%E7%B4%A2&lr=";    //调用方法isUtf8Url，进行判断是否Utf8Url编码    if(charTools.isUtf8Url(url)){    //如果是Utf8Url编码则调用Utf8URLdecode进行解码      System.out.println(charTools.Utf8URLdecode(url));    }else{      System.out.println(URLDecoder.decode(url));    }    url = "http://www.baidu.com/baidu?word=%D6%D0%B9%FA%B4%F3%B0%D9%BF%C6%D4%DA%CF%DF%C8%AB%CE%C4%BC%EC%CB%F7&tn=myie2dg";    if(charTools.isUtf8Url(url)){      System.out.println(charTools.Utf8URLdecode(url));    }else{      System.out.println(URLDecoder.decode(url));    }        String charT="刘奇庭是不错的人";    String ct=charTools1.chopAtWord(charT, 5);    System.out.println(ct);  }}
热点排行
编程

字符编码工具种