首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 开发语言 > 编程 >

有趣的统计英文字母频次的例子

2013-03-01 
有趣的统计英文字母频率的例子统计的是英文版悲惨世界,代码如下,使用ascii值做数组下标直接赋值:import

有趣的统计英文字母频率的例子
统计的是英文版"悲惨世界",代码如下,使用ascii值做数组下标直接赋值:

import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.io.IOException;import java.text.DecimalFormat;public class EnglishAlphaBetaStatics {public static final String EN_FOLDER = "C:/resources/Books/English/Les Miserables.txt";private static final int ASCII_START = 33; // ASCII from 33; ignore the// spaceprivate static final int ASCII_LENGTH = 94;private int[] result = new int[ASCII_LENGTH];private int total = 0;/** * Handle one English fiction *  * @param file * @throws IOException */public void handleOneFile(File file) throws IOException {if (file == null)throw new NullPointerException();BufferedReader in = new BufferedReader(new FileReader(file));String line;while ((line = in.readLine()) != null) {for (int i = 0; i < line.length(); i++) {char c = line.charAt(i);if (c >= ASCII_START && c < ASCII_START + ASCII_LENGTH) {result[c - ASCII_START] += 1;total++;} else {}}}in.close();}/** * Print the statics result */public void printResult() {// For sortingint[] abc = new int[ASCII_LENGTH];for (int i = 0; i < abc.length; i++) {abc[i] = ASCII_START + i;}// Sortingfor (int i = 0; i < result.length; i++) {for (int j = 0; j < result.length - 1 - i; j++) {if (result[j] < result[j + 1]) {int tmp = result[j];result[j] = result[j + 1];result[j + 1] = tmp;// swap the characterstmp = abc[j];abc[j] = abc[j + 1];abc[j + 1] = tmp;}}}// FormatDecimalFormat df = new DecimalFormat("#.######");System.out.println("Total characters: " + total);System.out.println("Char\tNumber\t%");System.out.println("-----------------------------------");for (int i = 0; i < result.length; i++) {char c = (char) abc[i];double rate = result[i] * 100.0 / total;System.out.println(c + "\t" + result[i] + "\t" + df.format(rate)+ "%");}}/** * @param args */public static void main(String[] args) throws IOException {EnglishAlphaBetaStatics eab = new EnglishAlphaBetaStatics();eab.handleOneFile(new File(EN_FOLDER));eab.printResult();}}

我以为会多慢呢,没想到瞬间完成,统计结果如下:
引用Total characters: 2800496
CharNumber%
-----------------------------------
e34589112.351062%
t2352808.401369%
a2113307.546163%
o1907996.813043%
h1803026.438217%
n1761706.290671%
i1745526.232896%
s1668555.958052%
r1528965.459604%
d1131234.039392%
l1025273.66103%
u709752.534372%
c660612.358904%
m590362.108055%
w565132.017964%
f564762.016643%
,517141.846601%
g486331.736585%
p419781.498949%
y399991.428283%
b362151.293164%
.323221.154153%
v254000.906982%
k147240.525764%
"146160.521908%
T126250.450813%
-102220.365007%
I95300.340297%
A68900.246028%
H65530.233994%
M64350.229781%
;62040.221532%
E43130.154008%
S42920.153259%
C42620.152187%
x38820.138618%
'38520.137547%
!36900.131762%
O35930.128299%
j34950.124799%
B34760.124121%
W33160.118408%
?30940.11048%
R30570.109159%
P30470.108802%
F28110.100375%
N27510.098233%
:25530.091162%
J25360.090555%
q25350.09052%
G24010.085735%
L22960.081985%
V20210.072166%
z19600.069988%
D17350.061953%
Y11470.040957%
U7680.027424%
16870.024531%
K6000.021425%
84220.015069%
*3030.01082%
X2620.009355%
22430.008677%
32370.008463%
`2270.008106%
52030.007249%
[1970.007034%
]1970.007034%
01950.006963%
41550.005535%
71420.005071%
61330.004749%
Q1210.004321%
91130.004035%
(950.003392%
)950.003392%
Z580.002071%
_400.001428%
|360.001285%
{20.000071%
}20.000071%
+10.000036%
/10.000036%
#00%
$00%
%00%
&00%
<00%
=00%
>00%
@00%
\00%
^00%
~00%

热点排行