首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 开发语言 > 编程 >

哈工大编译原理第一回实验-词法分析(Java版本)

2012-11-03 
哈工大编译原理第一次实验--词法分析(Java版本)1.在判断空行的时候,java里面用 line 不好使,调试发

哈工大编译原理第一次实验--词法分析(Java版本)

1.在判断空行的时候,java里面用 line == "" 不好使,调试发现进不去if,然后用line.equals("")就好使。

2.java标准化输出,可以有:System.out.printf("%-10s\t<ERROR:标识符重复!>\n",token);这种写法!printf啊,但是可以不能输出到文件中。不过我们可以这么写:

output.write(String.format("%-10s\t<%s,-->",token,token));  

String.format 救了我们哦~~

3.输出到文件中怎么换行呢? output.write("空行~\r\n"); 呵呵,win下是\r\n哦,linux下\n。。。

4.传说中的符号表的C语音代码实现:http://blog.163.com/ppt_compiler/blog/static/20281300720125120041966/

===================================================================================

如何解读这个看起来很糟糕的基本没啥注释的代码呢?

1.看清楚结构,结构如下:

(1)读入一行line,把line转成char[] 的strLine数组,然后每次处理一个字符ch(看红色代码,所有的处理都在for里面)。

        (2)然后对每个ch进行分类:if else if else if 。。。建议每次看一个if{}就不会头晕啦

2.看清楚算法,这个是基于很精巧的“状态转移图”的程序,我拿个数字处理的代码讲解下:

哈工大编译原理第一回实验-词法分析(Java版本)

那么我们就建立个二维数组来实现这个状态的转移:

   123456

 1 d.#e##

 2 ##d###

 3 ##de##

 4 ####-d

 5 #####d

 6 #####d

我们忽略0状态,因为我们已经进入了。

状态1到状态1有矢量连接,所以数组d[1][1] = 'd'

状态1到状态2有矢量连接,所以数组d[1][2] = '.'

依次类推,没有矢量的就标为'#',然后关键代码如下:

package ouyang;import java.io.*;import java.util.*;public class AnalysisCodeToWord {public static void main(String args[]) {String infile = "code.txt";String outfile = "out.txt";try {FileInputStream f = new FileInputStream(infile);BufferedReader dr = new BufferedReader(new InputStreamReader(f));BufferedWriter output = new BufferedWriter(new FileWriter(outfile));String line = "";int cnt = 0;while ((line = dr.readLine()) != null) {cnt++;if (cnt == 1) {System.out.println("line : " + cnt);output.write(String.format("line : %d\r\n", cnt));} else {System.out.println("\n\nline : " + cnt);output.write(String.format("\r\n\r\nline : %d\r\n", cnt));}if (line.equals("")) {System.out.println("空行~");output.write("空行~\r\n");} else {char[] strLine = line.toCharArray();for (int i = 0; i < strLine.length; i++) {char ch = strLine[i];String token = "";if (isAlpha(ch)) // 判断关键字和标识符{do {token += ch;i++;if(i>=strLine.length) break;ch = strLine[i];} while (ch != '\0' && (isAlpha(ch) || isDigit(ch)));--i; // 指针回退if (isMatchKeyword(token.toString())) // 是关键字{System.out.printf("%-10s\t<%s,-->\n", token,token);output.write(String.format("%-10s\t<%s,-->\r\n", token, token));} else // 是标识符{if (symbol.isEmpty()|| (!symbol.isEmpty() && !symbol.containsKey(token))) {symbol.put(token, symbol_pos);System.out.printf("%-10s\t<标识符,(%s,入口:%d)>\n", token,token, symbol_pos);output.write(String.format("%-10s\t<标识符,(%s,入口:%d)>\r\n",token, token, symbol_pos));symbol_pos++;} else {System.out.printf("%-10s\t<ERROR:标识符重复!>\n", token);output.write(String.format("%-10s\t<ERROR:标识符重复!>\r\n",token));}}token = "";} else if (isDigit(ch)) // 判断数字常量{int s = 1;Boolean isfloat = false;while (ch != '\0'&& (isDigit(ch) || ch == '.' || ch == 'e' || ch == '-')) {if (ch == '.' || ch == 'e')isfloat = true;int k;for (k = 1; k <= 6; k++) {char tmpstr[] = digitDFA[s].toCharArray();if (ch != '#'&& 1 == in_digitDFA(ch, tmpstr[k])) {token += ch;s = k;break;}}if (k > 6)break;i++;if(i>=strLine.length) break;ch = strLine[i];}// if(ch) --i; // 指针回退Boolean haveMistake = false;if (s == 2 || s == 4 || s == 5) {haveMistake = true;} else // 1,3,6{if (!isOp(ch) || ch == '.')haveMistake = true;}if (haveMistake) // 错误处理{while (ch != '\0' && ch != ',' && ch != ';'&& ch != ' ') // 一直到“可分割”的字符结束{token += ch;i++;if(i>=strLine.length) break;ch = strLine[i];}System.out.printf("%-10s\tERROR:请确保实常数输入正确\n",token);output.write(String.format("%-10s\tERROR:请确保实常数输入正确!\r\n", token));} else {if (isfloat) {System.out.printf("%-10s\t<实型常量,%s>\n",token, token);output.write(String.format("%-10s\t<实型常量,%s>\r\n", token,token));} else {System.out.printf("%-10s\t<实型常量,%s>\n",token, token);output.write(String.format("%-10s\t<整型常量,%s>\r\n", token,token));}}--i;token = "";} else if (ch == '\'') // 识别字符常量,类似处理字符串常量。{int s = 0;Boolean haveMistake = false;String token1 = "";token1 += ch;while (s != 3) {i++;if(i>=strLine.length) break;ch = strLine[i];if (ch == '\0') {haveMistake = true;break;}for (int k = 0; k < 4; k++) {char tmpstr[] = stConDFA[s].toCharArray();if (in_sinStConDFA(ch, tmpstr[k])) {token1 += ch; // 为输出if (k == 2 && s == 1) {if (isEsSt(ch)) // 是转义字符token = token + '\\' + ch;elsetoken += ch;} else if (k != 3 && k != 1)token += ch;s = k;break;}}}if (haveMistake) {System.out.printf("%s\tERROR:字符常量引号不封闭\n",token1);output.write(String.format("%s\tERROR:字符常量引号不封闭\r\n", token1));--i;} else {if (token.length() == 1) {System.out.printf("%-10s\t<字符常量,%s>\n",token1, token);output.write(String.format("%-10s\t<字符常量,%s>\r\n", token1,token));} else if (token.length() == 2) {if (isEsSt(token.charAt(1))&& token.charAt(0) == '\\') {System.out.printf("%-10s\t<字符常量,%s>\n",token1, token);output.write(String.format("%-10s\t<字符常量,%s>\r\n", token1,token));}}}token = "";} else if (ch == '"') // 处理字符串常量的{String token1 = "";token1 += ch;int s = 0;Boolean haveMistake = false;while (s != 3 ) {i++;if(i>=strLine.length-1) {haveMistake = true;break;}ch = strLine[i];if (ch == '\0') {haveMistake = true;break;}for (int k = 0; k < 4; k++) {char tmpstr[] = stConDFA[s].toCharArray();if (in_stConDFA(ch, tmpstr[k])) {token1 += ch;if (k == 2 && s == 1) {if (isEsSt(ch)) // 是转义字符token = token + '\\' + ch;elsetoken += ch;} else if (k != 3 && k != 1)token += ch;s = k;break;}}}if (haveMistake) {System.out.printf("%-10s\tERROR:字符串常量引号不封闭\n",token1);output.write(String.format("%-10s\tERROR:字符串常量引号不封闭\n", token1));--i;} else {System.out.printf("%-10s\t<字符串常量,%s>\n",token1, token);output.write(String.format("%-10s\t<字符串常量,%s>\r\n",token1, token));}token = "";} else if (isOp(ch)) // 运算符,界符{token += ch;if (isPlusEqu(ch)) // 后面可以用一个"="{i++;if(i>=strLine.length) break;ch = strLine[i];if (ch == '=')token += ch;else {if (isPlusSame(strLine[i - 1])&& ch == strLine[i - 1])token += ch; // 后面可以用一个和自己一样的else {--i;}}}System.out.printf("%-10s\t<%s,-->\n", token, token);output.write(String.format("%-10s\t<%s,-->\r\n",token, token));token = "";} else if (ch == '/') // 注释+除号: 注释只要识别出来就好。{token += ch;i++;if(i>=strLine.length) break;ch = strLine[i];if (ch != '*' && ch != '/') // 除号处理{if (ch == '=')token += ch; // /=else {--i; // 指针回退 // /}System.out.printf("%-10s\t<%s,-->\n", token,token);output.write(String.format("%-10s\t<%s,-->\n",token, token));token = "";} else // 注释可能是‘//’也可能是‘/*’{Boolean haveMistake = false;if (ch == '*') {token += ch; // ch == '*'int s = 2;while (s != 4) {i++;if(i>=strLine.length) break;ch = strLine[i]; // 注意判断溢出!if (ch == '\0') {haveMistake = true;break;}for (int k = 2; k <= 4; k++) {char tmpstr[] = noteDFA[s].toCharArray();if (1 == in_noteDFA(ch, tmpstr[k],s)) {token += ch;s = k;break;}}}}else if(ch == '/') //这里就不用状态转移了...{int index = line.lastIndexOf("//");String tmpstr=line.substring(index);int tmpint = tmpstr.length();for(int k=0;k<tmpint;k++) {i++;}token = tmpstr;}System.out.printf("%-10s\t", token);output.write(String.format("%-10s\t", token));if (haveMistake) {System.out.printf("ERROR:注释没有封闭\n");output.write("ERROR:注释没有封闭\r\n");--i;} else {System.out.printf("(注释:%s)\n", token);output.write(String.format("(注释:%s)\n",token));}token = "";}}    else // 一些很奇怪的字符            {                if(ch != ' ' && ch != '\t')                {                System.out.printf("%-10c ERROR:存在不合法字符\n",ch);                output.write(String.format("%-10c ERROR:存在不合法字符\n",ch));                }            }}}}f.close();dr.close();output.close();} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}public static Boolean isAlpha(char ch) {return ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_');}public static Boolean isDigit(char ch) {return (ch >= '0' && ch <= '9');}public static Boolean isMatchKeyword(String str) {Boolean flag = false;for (int i = 0; i < 32; i++) {if (str.equals(keywords[i])) {flag = true;break;}}return flag;}public static Boolean isOp(char ch) // 判断是否是运算符{for (int i = 0; i < 22; i++)if (ch == oper[i]) {return true;}return false;}public static int in_digitDFA(char ch, char dD) {if (dD == 'd') {if (isDigit(ch))return 1;elsereturn 0;}return (ch == dD) ? 1 : 0;}public static Boolean in_stConDFA(char ch, char key) {if (key == 'a')return true;if (key == '\\')return ch == key;if (key == '"')return ch == key;if (key == 'd')return ch != '\\' && ch != '"';return false;}public static Boolean in_sinStConDFA(char ch, char key) {if (key == 'a')return true;if (key == '\\')return ch == key;if (key == '"')return ch == '\'';if (key == 'd')return ch != '\\' && ch != '\'';return false;}public static Boolean isPlusEqu(char ch) // 运算符后可加等于{return ch == '+' || ch == '-' || ch == '*' || ch == '/' || ch == '='|| ch == '>' || ch == '<' || ch == '&' || ch == '|'|| ch == '^';}public static Boolean isPlusSame(char ch) // 可以连续两个运算符一样{return ch == '+' || ch == '-' || ch == '&' || ch == '|';}public static Boolean isEsSt(char ch) {return ch == 'a' || ch == 'b' || ch == 'f' || ch == 'n' || ch == 'r'|| ch == 't' || ch == 'v' || ch == '?' || ch == '0';}public static int in_noteDFA(char ch, char nD, int s) {if (s == 2) {if (nD == 'c') {if (ch != '*')return 1;elsereturn 0;}}if (s == 3) {if (nD == 'c') {if (ch != '*' && ch != '/')return 1;elsereturn 0;}}return (ch == nD) ? 1 : 0;}public static String code = "";public static Map<String, Integer> symbol = new HashMap<String, Integer>();// =new// HashMap<String,int>;public static int symbol_pos = 0;// 32个public static String keywords[] = { "auto", "double", "int", "struct","break", "else", "long", "switch", "case", "enum", "register","typedef", "char", "extern", "return", "union", "const", "float","short", "unsigned", "continue", "for", "signed", "void","default", "goto", "sizeof", "volatile", "do", "if", "while","static" };// 7个public static String digitDFA[] = { "#", "#d.#e##", "###d###", "###de##","#####-d", "######d", "######d" };// 22个public static char oper[] = { '+', '-', '*', '=', '<', '>', '&', '|', '~','^', '!', '(', ')', '[', ']', '{', '}', '%', ';', ',', '#', '.' };// 4个public static String stConDFA[] = { "#\\d#", "##a#", "#\\d\"", "####" };// 4个public static String noteDFA[] = { "#", "##*##", "##c*#", "##c*/", "#####" };}






热点排行