有趣的统计英文字母频率的例子
统计的是英文版"悲惨世界",代码如下,使用ascii值做数组下标直接赋值:
import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.io.IOException;import java.text.DecimalFormat;public class EnglishAlphaBetaStatics {public static final String EN_FOLDER = "C:/resources/Books/English/Les Miserables.txt";private static final int ASCII_START = 33; // ASCII from 33; ignore the// spaceprivate static final int ASCII_LENGTH = 94;private int[] result = new int[ASCII_LENGTH];private int total = 0;/** * Handle one English fiction * * @param file * @throws IOException */public void handleOneFile(File file) throws IOException {if (file == null)throw new NullPointerException();BufferedReader in = new BufferedReader(new FileReader(file));String line;while ((line = in.readLine()) != null) {for (int i = 0; i < line.length(); i++) {char c = line.charAt(i);if (c >= ASCII_START && c < ASCII_START + ASCII_LENGTH) {result[c - ASCII_START] += 1;total++;} else {}}}in.close();}/** * Print the statics result */public void printResult() {// For sortingint[] abc = new int[ASCII_LENGTH];for (int i = 0; i < abc.length; i++) {abc[i] = ASCII_START + i;}// Sortingfor (int i = 0; i < result.length; i++) {for (int j = 0; j < result.length - 1 - i; j++) {if (result[j] < result[j + 1]) {int tmp = result[j];result[j] = result[j + 1];result[j + 1] = tmp;// swap the characterstmp = abc[j];abc[j] = abc[j + 1];abc[j + 1] = tmp;}}}// FormatDecimalFormat df = new DecimalFormat("#.######");System.out.println("Total characters: " + total);System.out.println("Char\tNumber\t%");System.out.println("-----------------------------------");for (int i = 0; i < result.length; i++) {char c = (char) abc[i];double rate = result[i] * 100.0 / total;System.out.println(c + "\t" + result[i] + "\t" + df.format(rate)+ "%");}}/** * @param args */public static void main(String[] args) throws IOException {EnglishAlphaBetaStatics eab = new EnglishAlphaBetaStatics();eab.handleOneFile(new File(EN_FOLDER));eab.printResult();}}