1200字范文,内容丰富有趣,写作的好帮手!
1200字范文 > C-语言词法分析器与语法分析器(一)

C-语言词法分析器与语法分析器(一)

时间:2023-12-27 01:21:55

相关推荐

C-语言词法分析器与语法分析器(一)

说明:

为实践《编译原理》中的相关知识,认真完成了课程设计,实现了C-语言的词法分析器与语法分析器

C-语言是C语言的一个子集,语法包括:

整型变量与函数的声明

if else 分支语句

while 循环语句

本篇介绍词法分析器的实现

流程:

写出该语言的词法规则与正则表达式构造DFA代码实现

该语言词法规则与正则表达式

1.保留字

int void if else return while

正则表达式即为原串,在代码中当作标识符匹配,匹配完后再与保留字比较

2.标识符

letter= [a-z | A-Z]

digit= [0-9]

正则表达式 letter(letter| digit)*

3.数字

digit= [1-9]

D = 0 | digit

整型:

正则表达式:digitD*

浮点型:

正则表达式:digit(. D*)? (e -? digitD*)?

4.符号

+ - * / % > >= < <= [ ] ( ) { } != == , ;

正则表达式即符号本身

5.注释

C = 所有字符

//型:

正则表达式:// C*

/**/型:

正则表达式:/* C**/

构造DFA

标识符:

(不考虑带 _ 的标识符,画多了)

整型:

浮点数:

//型注释:

/**/型注释:

代码实现

用switch case实现DFA

伪代码为:

​switch(state){case 1:c = getnextchar();state = goto(state, c); //根据当前状态与字符判断跳转到哪一个状态token.push_back(c); //将字符保存进tokenbreak;case 2:c = getnextchar();state = goto(state, c); //根据当前状态与字符判断跳转到哪一个状态token.push_back(c); //将字符保存进tokenbreak;case 接受状态:print(token); //打印保存tokenbreak;……}​​

在状态转移的过程中,需要向前额外看一位,判断是否接受当前的token

如在匹配int a = 123;的过程中,匹配到a时,向前看一位是=,于是将a保存为一个token,同时指针退一位,下次从=开始匹配

详细代码:

//Scanner.h//作者:IuSpet//作用:将c-源代码转化为token输出#ifndef Scanner_h#define Scanner_h#include"utlib.h"class Scanner{private://char buffer[4096];//读入源程序的缓冲区std::string buffer;int pos;//缓冲区位置int syn;//token类别int state;//DFA中的状态std::string sourcename;int filepos;std::ifstream infile;//int tsss;const int BUFFERLENGTH = 4096;public:Scanner(const char* s){//if (source = fopen(s, "r"));//else exit(1);sourcename = s;infile.open(s);pos = 0;syn = -1;state = 0;filepos = 0;//fgets(buffer, BUFFERLENGTH, source);}void GetToken();//在DFA上转移,识别tokenbool IsNum(const char c);bool IsLetter(const char c);char GetNext();//获取下一个字符void Back();//向前看完后回溯~Scanner(){infile.close();}};#endif // !Scaner_h#pragma once

//Scanner.cpp//作者:IuSpet//作用:将c-源代码转化为token输出#include "scanner.h"void Scanner::GetToken(){char ch;constexpr int TOKENLENGTH = 256;char token[TOKENLENGTH];memset(token, 0, TOKENLENGTH);int tokenpos = 0;std::ofstream outfile("D://cminus//token.txt");while ((ch = GetNext()) != EOF){//todo: 标识符,关键字,整型,浮点数运算符,注释,界符,字符串的dfawhile (state != 100)switch (state){case 0://开始状态if (ch == '+') state = 1;else if (ch == '-') state = 2;else if (ch == '*') state = 3;else if (ch == '/') state = 4;else if (ch == '<') state = 5;else if (ch == '>') state = 6;else if (ch == '=') state = 7;else if (ch == ';') state = 8;else if (ch == '!') state = 9;else if (ch == '[') state = 10;else if (ch == ']') state = 11;else if (ch == '(') state = 12;else if (ch == ')') state = 13;else if (ch == '{') state = 14;else if (ch == '}') state = 15;else if (ch == '"') state = 16;else if (ch == ',')state = 17;else if (IsLetter(ch)) state = 18;else if (IsNum(ch)) state = 19;else if (ch == ' ' || ch == '\t' || ch == '\n') state = 100;else state = 99;//异常break;case 1://匹配到 +token[tokenpos++] = ch;ch = GetNext();if (IsNum(ch)) state = 19;else{Back();syn = 8;state = 100;}break;case 2://匹配到 -token[tokenpos++] = ch;syn = 9;state = 100;break;case 3://匹配到 *token[tokenpos++] = ch;syn = 10;state = 100;break;case 4://匹配到 /token[tokenpos++] = ch;ch = GetNext();if (ch == '/') state = 20;else if (ch == '*') state = 21;else{Back();state = 100;syn = 11;}break;case 20://匹配到 //while ((ch = GetNext()) != '\n');syn = 31;state = 100;break;case 21:// 匹配到 /*ch = GetNext();if (ch == '*') state = 22;else state = 21;break;case 22://匹配到 /**ch = GetNext();if (ch == '*') state = 22;else if (ch == '/')state = 23;else state = 21;break;case 23://匹配到 /**/state = 100;syn = 32;break;case 5://匹配到 <token[tokenpos++] = ch;ch = GetNext();if (ch == '=') state = 24;else{Back();;state = 100;syn = 12;}break;case 24://匹配到 <=token[tokenpos++] = ch;state = 100;syn = 23;break;case 6://匹配到 >token[tokenpos++] = ch;ch = GetNext();if (ch == '=') state = 25;else{Back();state = 100;syn = 13;}break;case 25://匹配到 >=token[tokenpos++] = ch;state = 100;syn = 24;break;case 7://匹配到 =token[tokenpos++] = ch;ch = GetNext();if (ch == '=') state = 26;else{Back();state = 100;syn = 14;}break;case 26://匹配到 ==token[tokenpos++] = ch;state = 100;syn = 25;break;case 8://匹配到 ;token[tokenpos++] = ch;state = 100;syn = 15;break;case 9://匹配到 !token[tokenpos++] = ch;ch = GetNext();if (ch == '=') state = 27;else state = 99;break;case 27://匹配到 !=token[tokenpos++] = ch;state = 100;syn = 26;break;case 10://匹配到 [token[tokenpos++] = ch;state = 100;syn = 17;break;case 11://匹配到 ]token[tokenpos++] = ch;state = 100;syn = 18;break;case 12://匹配到 (token[tokenpos++] = ch;state = 100;syn = 19;break;case 13://匹配到 )token[tokenpos++] = ch;state = 100;syn = 20;break;case 14://匹配到 {token[tokenpos++] = ch;state = 100;syn = 21;break;case 15://匹配到 }token[tokenpos++] = ch;state = 100;syn = 22;break;case 16://匹配到 "……token[tokenpos++] = ch;ch = GetNext();if (ch == '"') state = 28;else state = 16;break;case 28://匹配到 "……"token[tokenpos++] = ch;state = 100;syn = 30;break;case 17://匹配到 ,token[tokenpos++] = ch;state = 100;syn = 16;break;case 18://匹配到字母token[tokenpos++] = ch;ch = GetNext();if (IsLetter(ch) || IsNum(ch)) state = 18;//向前看一位还是数字或字母else{Back();state = 29;//向前看一位不属于标识符}break;case 29://判断匹配到的标识符是不是关键字if (strcmp(token, "if") == 0){state = 100;syn = 0;}else if (strcmp(token, "else") == 0){state = 100;syn = 1;}else if (strcmp(token, "int") == 0){state = 100;syn = 2;}else if (strcmp(token, "double") == 0){state = 100;syn = 3;}else if (strcmp(token, "return") == 0){state = 100;syn = 4;}else if (strcmp(token, "void") == 0){state = 100;syn = 5;}else if (strcmp(token, "while") == 0){state = 100;syn = 6;}else if (strcmp(token, "char") == 0){state = 100;syn = 7;}else{state = 100;syn = 27;}break;case 19://匹配到的是数字token[tokenpos++] = ch;ch = GetNext();if (IsNum(ch)) state = 19;//下一位还是数字else if (ch == '.') state = 30;//下一位是.else{Back();state = 100;syn = 28;}break;case 30://匹配到 digit D* .token[tokenpos++] = ch;ch = GetNext();if (IsNum(ch)) state = 30;else if (ch == 'e') state = 31;else{Back();state = 100;syn = 29;}break;case 31://匹配到 digit D* . D* etoken[tokenpos++] = ch;ch = GetNext();if (IsNum(ch)) state = 32;else if (ch == '-')state = 33;else{Back();state = 99;}break;case 32://匹配到 digit D* . D* etoken[tokenpos++] = ch;ch = GetNext();if (IsNum(ch)) state = 32;else{Back();state = 100;syn = 29;}break;case 33://匹配到 digit D* . D* e -token[tokenpos++] = ch;ch = GetNext();if (IsNum(ch)) state = 32;else{Back();state = 99;}break;case 99://匹配中出错std::cout << std::endl;std::cout << "error" << std::endl;std::cout << (int)ch << " " << ch << std::endl;ch = GetNext();while (ch != ' '&&ch != '\t'&&ch != '\n'&&ch != ';') ch = GetNext();Back();state = 100;syn = -1;}if (state == 100 && syn != -1)//接受状态{switch (syn){case 0:case 1:case 2:case 3:case 4:case 5:case 6:case 7:outfile << "<" << "RESERVED WORD," << token << ">" << std::endl;//outfile << "<" << token << ">" << std::endl;break;case 27:outfile << "<" << "ID," << token << ">" << std::endl;break;case 28:outfile << "<" << "NUM," << token << ">" << std::endl;break;case 29:outfile << "<" << "DOUBLE," << token << ">" << std::endl;break;case 30:outfile << "<" << "STRING," << token << ">" << std::endl;break;case 31:case 32:break;default:outfile << "<" << "SYMBOL," << token << ">" << std::endl;//outfile << "<" << token << ">" << std::endl;}memset(token, 0, TOKENLENGTH);tokenpos = 0;state = 0;syn = -1;}if (state = 100) state = 0;}outfile.close();}bool Scanner::IsNum(const char c){return (c >= '0' && c <= '9');}bool Scanner::IsLetter(const char c){return c >= 'a'&&c <= 'z' || c >= 'A'&&c <= 'Z';}char Scanner::GetNext(){if (pos < buffer.length()){return buffer[pos++];}else{if (std::getline(infile, buffer)){buffer.push_back('\n');}else{return EOF;}pos = 0;return buffer[pos++];}}void Scanner::Back(){pos -= 1;}

测试

测试源码:

int main(){//annotation1int a = 123;double b = 12.2e-2;char str = "hello world";if(a != 123){return 1;}/****annotation2****/else{b = a * b + a / b;}return 0;}

测试结果:

<RESERVED WORD,int><ID,main><SYMBOL,(><SYMBOL,)><SYMBOL,{><RESERVED WORD,int><ID,a><SYMBOL,=><NUM,123><SYMBOL,;><RESERVED WORD,double><ID,b><SYMBOL,=><DOUBLE,12.2e-2><SYMBOL,;><RESERVED WORD,char><ID,str><SYMBOL,=><STRING,"hello world"><SYMBOL,;><RESERVED WORD,if><SYMBOL,(><ID,a><SYMBOL,!=><NUM,123><SYMBOL,)><SYMBOL,{><RESERVED WORD,return><NUM,1><SYMBOL,;><SYMBOL,}>

全部源码

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。