使用有限状态自动机实现C语言的声明解析器
摘要:在很多的游戏编程中,我们使用了有限状态自动机作为模型。有限状态自动机作为变成模型,具有通用性好,方便理解的特点。本文主要结合前一个系列的两篇文章(1)C语言声明解析器的实现和(2)用C语言实现有限状态自动机来说明如何用有限状态自动机模型实现一个C语言的声明解析器。
typedef struct{ state current; trasition trasitions[STATENUM][CONDITIONS];} StateMachine, * pStateMachine;我们在解析一个C语言声明的时候,大致过程是这样的:
1)读取声明到第一个变量名称标识符name,此为状态S0
2)读取name后面的下一个标识符,确定类型并采取相应的动作:
“[”:这是一个数组,我们要处理这个数组,然后进入状态S1
“(”:这是一个函数,我们要处理这个函数,然后进入状态S2
“)”:它的前面是一个指针,我们要处理这个指针,然后进入状态S3
“/0”:声明读取完毕,可以结束了,进入状态S4
”other“:出错
这个状态机如下:

注意:S1,S2,S3,S4的状态图没有从图中体现出来,实际上,S1~S4的出边和S0是几乎一样的,所不同的就是S1没有经过function到达S2的出边,因为数组的元素不能是函数(可以是函数指针)。这一部分可以参考前面的C语言声明规则。
S0:读完声明到第一个变量标识符
S1:读完一个数组
S2:读完后面的一个函数
S3:读完前面的一个指针
S4:解析完毕
我们仅仅谈与自动机相关的部分,其他的部分可以在 另外一篇文章 一个C语言声明解析器的设计与实现里找到。
#include<stdio.h>#include<ctype.h>#include<unistd.h>#include<stdlib.h>#include<string.h>#define MAXTOKENLEN 64#define MAXTOKENS 100#define MAXDECLLEN 200typedef int state;typedef char condition;#define STATENUM 5#define STATE0 0#define STATE1 1#define STATE2 2#define STATE3 3#define STATE4 4#define STATETRAP 6#define CONDITIONS 5 struct token{ char type; char string[MAXTOKENLEN];};struct decl{ char string[MAXDECLLEN]; int current_location;};struct decl mydecl;int top=-1;struct token this;//the current tokenstruct token stack[MAXTOKENS];//the tokens before the first identifer#define IDENTIFIER 'I'#define QUALIFIER 'Q'#define TYPE 'T'#define pop stack[top--]#define push(s) stack[++top]=s//#define debug char classify_string(const char *string);/* * init the value of mydecl*/int init_parse_decl(){ if( fgets(mydecl.string,MAXDECLLEN,stdin) == NULL){ perror("can not read from the stdin"); } if(strlen(mydecl.string)==MAXDECLLEN-1) printf("you input is too long, you may get an error!"); mydecl.current_location=0; #ifdef debug printf("init:we get last char of mydecl:%c,%d\n",mydecl.string[strlen(mydecl.string)],mydecl.string[strlen(mydecl.string)]); #endif}/* get a token from the current string,value the "this" and move the pointernotice: we may get a '\0' at the end of the string */int gettoken(){ char *ch_pointer=this.string; //leave out the blank while(mydecl.string[mydecl.current_location]==' '){ mydecl.current_location++; } *ch_pointer=mydecl.string[mydecl.current_location]; if(isalnum(*ch_pointer)){ while(isalnum(*ch_pointer)){ mydecl.current_location++; ch_pointer++; *ch_pointer=mydecl.string[mydecl.current_location];}*ch_pointer='\0';this.type=classify_string(this.string );//indentifier,qualifier,type#ifdef debug printf("we get token:%s ",this.string );#endifreturn 1; } //else:(, ), [, ] // printf("current location is:%d\n",mydecl.current_location); // printf("(%d)",mydecl.string[mydecl.current_location]); switch(*ch_pointer){ case '*': case '(': case ')': case '[': case ']': this.type=*ch_pointer; this.string[1]='\0'; mydecl.current_location++; #ifdef debug printf("we get token:%s ",this.string ); #endif break; case '\n': case '\0':this.type='\0'; // printf("we come to the end\n"); strcpy(this.string,"then end"); return 0; default : printf("we can not read this indentifier %d\n",*ch_pointer); parse_error(); } return 1;}char classify_string(const char *string){ if(isspace(*string)) return '\0'; if(!strcmp(string,"const")) return QUALIFIER; if(!strcmp(string,"volatile")) return QUALIFIER; if(!strcmp(string,"void")) return TYPE; if(!strcmp(string,"char")) return TYPE; if(!strcmp(string,"signed")) return TYPE; if(!strcmp(string,"unsigned")) return TYPE; if(!strcmp(string,"short")) return TYPE; if(!strcmp(string,"int")) return TYPE; if(!strcmp(string,"long")) return TYPE; if(!strcmp(string,"float")) return TYPE; if(!strcmp(string,"struct")) return TYPE; if(!strcmp(string,"union")) return TYPE; if(!strcmp(string,"enum")) return TYPE; return IDENTIFIER ;}/*follow the dec string until the first identifier,push token to a stack*/int read_to_first_identifier(){ if(gettoken()==0){ printf("readtofirst:missing the identifier,please put in your string...\n"); return 0; } //the token is a struct with member type and string while(this.type!=IDENTIFIER) { push(this);//a stack with element of token if(gettoken()==0){ printf("readtofirst:missing the identifier\n"); return 0; } } printf("%s is ", this.string); return 1;}/*deal with the next token in diff ways according to the token typewe just go to right1) "(":deal in a function way,deal_with_declaration,2) "[":deal in a array way,deal_with_declaration,3) ")":deal with a pointer way,deal_with_declaration4) NULL:move_left,*/int deal_with_declaration(){ if(gettoken()==0){ move_left(); return 1; } // switch(this.type){ case '(': deal_with_function();break; case ')': deal_with_pointer();break; case '[': deal_with_array();break; default : printf("there should not be a %c after the identifier\n",this.type );parse_error(); } deal_with_declaration();}/*the current token is [,the content in the '[]' may be digtal,al,and other*/int deal_with_array(){ printf("array of "); while(this.type!=']'){ gettoken();//we may get an "]" or digital if(isdigit(this.string[0])){ printf("%d ", atoi(this.string)); //the next token must be ] gettoken(); if(this.type!=']'){printf("the array miss the ']'before %s",this.string);parse_error(); } } } return 0;}int parse_error(){printf("press any key to exit\n");getchar();exit(0);}/*the current token is ')' */int deal_with_pointer(){ while(stack[top].type=='*'){ printf("pointer pointing to \n"); pop; } if(stack[top].type!='('){ printf(" missing an '(' after %s\n",stack[top].string ); parse_error(); } else{ pop; return 1; }}/*the current token is '('*/int deal_with_function(){ while(this.type!=')'){ gettoken(); if(this.type=='\0'){ printf(" missing an ')' before %s",this.string); parse_error(); } } printf(" function returning "); return 1;}int move_left(){ while(top>=0){ switch (stack[top].type){ case '*': printf(" pointer pointing to ");break; case QUALIFIER: printf(" %s ",stack[top].string );break; case TYPE: printf(" %s ",stack[top].string );break; default :printf("there is someting wrong about %s",stack[top].string);parse_error();break; } top--; }}typedef int (* actiontype)( );typedef struct{ state next; actiontype action;}trasition, *ptrasition;typedef struct{ state current; int statenum; int conditionnum; trasition trasitions[STATENUM][CONDITIONS];} StateMachine, * pStateMachine;int initMachine(pStateMachine mymachine){ trasition array={ STATE1,deal_with_array }, fun={ STATE2,deal_with_function }, pointer={ STATE3,deal_with_pointer }, left={ STATE4,move_left }, acerr={ STATE4,parse_error }, end={ STATE4,parse_error/*it will no be execed*/ }; trasition mytrasitions[STATENUM][CONDITIONS]={ /*[, (, ), \0,other*/ /*s0*/ array,fun,pointer,left,acerr, /*s1*/ array,fun,pointer,left,acerr, /*s2*/ array,fun,pointer,left,acerr, /*s3*/ array,fun,pointer,left,acerr, /*s4*/ end, end, end ,end , end }; mymachine->statenum=STATENUM; mymachine->conditionnum=CONDITIONS; mymachine->current=STATE0; int i,j; for (i = 0; i < STATENUM ; ++i) { for (j = 0; j < CONDITIONS; ++j) { mymachine->trasitions[i][j]=mytrasitions[i][j]; } }} state step(pStateMachine machine, condition mycondition){ int index; switch (mycondition){ case '[': index=0;break; case '(': index=1;break; case ')': index=2;break; case '\0': index=3;break; default: index=4; } trasition t = machine->trasitions[machine->current][index]; (*(t.action))( ); if(machine->current==1&&index==1){ printf("\n\n\nerror:the element of array should not be function\n"); parse_error(); } if(machine->current==2&&index==1){ printf("\n\n\nerror:the returning type of a funciton should not be function\n"); parse_error(); } if(machine->current==2&&index==0){ printf("\n\n\nerror:the returning type of a funciton should not be array\n"); parse_error(); } machine->current = t.next; //printf("the current state is %d\n",t->next ); return machine->current; }int main(int argc, char *argv[]){ StateMachine mymachine; init_parse_decl(); read_to_first_identifier(); initMachine(&mymachine); char mycon; while(mymachine.current!=STATE4){ gettoken(); mycon=this.type; step(&mymachine,mycon); } printf("\n"); return 0;}