编译原理课程：词法分析实习报告与算法实现-CSDN博客

编号： CSM49

实习

一

二

三

四

五

六

七

八

九

十

总评

教师签名

成绩

计算机学院

《编译原理》课程

词法分析

实习报告

编号：班序号

实习题目：词法分析程序

专业（班）：计科四班

学生学号：

学生姓名：

任课教师：

２０２２年 4 月 27 日

语言形式化描述

正在上传…重新上传取消正在上传…重新上传取消

其中

正在上传…重新上传取消正在上传…重新上传取消为终极符号，

正在上传…重新上传取消正在上传…重新上传取消={

A，B，C，D，E，F，G，H，I，J，K，L，M，N，O，P，Q，R，S，T，U，V，W，X，Y，Z,

b，c，d，e，f，g，h，i，j，k，1，m，n，o，p，q，r，s，t，u，v，w，x，y,z,

1,2,3,4,5,6,7,8,9,0,

!,#,$,%,&,*,(,),-,_,+,=,[,],:,<,>, , ,;,.,/,?,|,^,\,{,}

}

正在上传…重新上传取消正在上传…重新上传取消为非终结符号,={}

正在上传…重新上传取消正在上传…重新上传取消为产生式集合

正在上传…重新上传取消正在上传…重新上传取消

正在上传…重新上传取消正在上传…重新上传取消为开始符号

单词编码表

单词符号	编码
private	30
protected	31
this	32
false	33
true	34
try	35
catch	36
throw	37
goto	38
using	39
new	40
namespace	41
+	42
++	43
-	44
--	45
*	46
/	47
%	48
<	49
<=	50
<<	51
>	52
>=	53
>>	54
=	55
==	56
!	57
!=	58

单词符号	编码
&	59
&&	60
\|	61
\|\|	62
^	63
,	64
;	65
.	66
{	67
}	68
[	69
]	70
(	71
)	72
#	73
\	74
?	75
标识符表	76
整数常量表	77
浮点数常量表	78

单词编码表

单词符号	编码
main	1
if	2
else	3
while	4
do	5
for	6
int	7
double	8
float	9
char	10
long	11
short	12
enum	13
static	14
bool	15
void	16
switch	17
case	18
break	19
continue	20
signed	21
unsigned	22
return	23
default	24
struct	25
include	26
define	27
class	28
public	29

第三部分状态转换图

正在上传…重新上传取消正在上传…重新上传取消

词法分析算法

#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define NUM 15
using namespace std;

FILE* f_in;
FILE* f_out;
char f1_name[100];
char f2_name[100];
char keyWord[NUM][20] = { "default","if", "else","for","while","do","int","read","write","float","switch","case","break","function","call" };   //保留字
char token[15]; //识别的单词
char token_num; //记录单词长度
char ch; //单词的首个字符
int row; //记录程序编译行数
char tmp[15];
int flag1; //用于标志换行后可能产生错误
int flag2; //区别标识符和保留字大小写敏感
int flag3;
int flag = 0;
char(*keyword)[20] = keyWord;
char* string;

void compile();
int compile_word();
void sort(char(*a)[20]);
int binary_S(char(*a)[20], char* string);

int main() {
sort(keyWord);
printf("请输入要编译的文件名字:");
scanf("%s", f1_name);
f_in = fopen(f1_name, "r");
printf("请输入要将编译结果存入的文件名字:");
scanf("%s", f2_name);
f_out = fopen(f2_name, "w");
compile();
fclose(f_in);
fclose(f_out);
system("pause");
return 0;
}

//用冒泡法将保留字数组排序（根据ASCII码）
void sort(char(*a)[20]) {
char tmp[20];
for (int i = 0;i < NUM;i++) {
for (int j = i + 1;j < NUM;j++) {
if (strcmp(a[j], a[i]) < 0) {
for (int k = 0;k < 20;k++) {
tmp[k] = a[i][k];
a[i][k] = a[j][k];
a[j][k] = tmp[k];
}
}
}
}
}
//对数组进行折半查找
int binary_S(char(*a)[20], char* string) {
int low = 0;
int high = NUM-1;
while (low <= high) {
int middle = (low + high) / 2;
if (strcmp(string, a[middle]) == 0) {
return middle;
}
else if (strcmp(string, a[middle]) < 0) {
high = middle - 1;
}
else {
low = middle + 1;
}
}
return -1;
}

void bqd() {
//状态2
switch (ch)
{
case '*': ch = getc(f_in); //转到状态3
//状态3
s3:while (ch != '*') {
ch = getc(f_in);
if (ch == EOF) {
printf("ERROR: the error place is in the %d row.注释错误\n", row + 1);
return;
}
}                                     //状态3循环
switch (ch)
{
case '*':ch = getc(f_in); //转到状态4
//状态4
while (ch == '*') ch = getc(f_in);  //状态4循环
switch (ch)
{
case '/':ch = getc(f_in);
printf("注释正确\n");
fprintf(f_out, "注释正确\n");
return; //状态5结束
default:goto s3; //转到状态3
}
default: goto end;
}
default:
printf("单分符\t%s\n", token); //状态6
goto end;
}
end:return;
}
int compile_word() {
//将识别的单词数组初始化
for (int i = 0;i < 15;i++) {
token[i] = NULL;
tmp[i] = NULL;
}
token_num = 0;
flag1 = 0;
flag2 = 0;
flag3 = 0;
//处理空格
while ((ch == ' ') || (ch == '\n')) {
if (ch == '\n') {
row++;
flag1 = 1;
}
ch = getc(f_in);
}
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { //输入可能是标识符或者保留字
//组成一个单词
while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')) {
token[token_num++] = ch;
ch = getc(f_in);
}
if (flag1 == 1) {
for (int j = 0;j < token_num;j++) {
tmp[j] = token[j];
if (token[j] >= 'A' && token[j] <= 'Z') {
tmp[j] = tmp[j] + 32;     //大写转换为小写
flag2 = 1;
}
}
}
token[token_num++] = '\0';
//比对保留字
for (int i = 0;i < NUM;i++) {
if (flag1 == 1 && flag2 == 1) {
if (binary_S(keyWord, tmp)!=-1) {
return -3; //大小写敏感
}
}
if (binary_S(keyWord, token)!=-1) {         //匹配到某个保留字
return 1;
}
}
return 2; //关键字ID
}
else if (ch >= '0' && ch <= '9') { //输入的是常量NUM(整型)
//组成一个单词
while ((ch >= '0' && ch <= '9') || ch == '.') {                                 //扩展为浮点型
token[token_num++] = ch;
ch = getc(f_in);
while((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
ch = getc(f_in);
flag3 = 1;
}
}
if (flag3 != 1) {
return 3;
}                                                                   //常量
else return -4; //非法单词
}
else { //输入为纯单分符
token[token_num++] = ch;
switch (ch)
{
//单分符为4 双分界符为5
case '*':
ch = getc(f_in);
return 4;
case '+':
ch = getc(f_in);
if (ch == '+') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
} //可以扩展为++
case '-':
ch = getc(f_in);
return 4; //可以扩展为--
case '(':
ch = getc(f_in);
return 4;
case ')':
ch = getc(f_in);
return 4;
case '{':
ch = getc(f_in);
return 4;
case '}':
ch = getc(f_in);
return 4;
case ',':
ch = getc(f_in);
return 4;
case ';':
ch = getc(f_in);
return 4;
case '"':
ch = getc(f_in);
return 4;
case '/':
ch = getc(f_in);
return 4;
case '>':
ch = getc(f_in);
//读下个字符看看是不是双分符
if (ch == '=') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
}
case '<':
ch = getc(f_in);
//读下个字符看看是不是双分符
if (ch == '=') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
}
case '!':
ch = getc(f_in);
//读下个字符看看是不是双分符
if (ch == '=') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
}
case ':':
ch = getc(f_in);
return 4;
case '=':
ch = getc(f_in);
//读下个字符看看是不是双分符
if (ch == '=') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
}
case EOF:return -1;    //文件结尾符号
default: //错误没有匹配
ch = getc(f_in);
return -2;
}
}
}
int INT;

void compile() {
int state; //记录编译状态
int error[100]; //记录错误行数
printf("编译结果:\n");
printf("类别值\t自身值\n");
//读取文件第一个字符
ch = getc(f_in);
while (1) {
if (ch != '/') {
state = compile_word();
if (state == -1) {
break;
}
switch (state)
{
case 1:
printf("%s\t%s\n", token, token);
fprintf(f_out, "%s\t%s\n", token, token);
break;
case 2: {
printf("ID\t%s\n", token);
fprintf(f_out, "ID\t%s\n", token);}
break;
case 3:
printf("NUM\t%s\n", token);
fprintf(f_out,"NUM\t%s\n", token);
break;
case 4:
printf("%s\t%s\n", token, token);
fprintf(f_out, "%s\t%s\n", token, token);
break;
case 5:
printf("%s\t%s\n", token, token);
fprintf(f_out, "%s\t%s\n", token, token);
break;
case -2:
printf("ERROR: the error place is in the %d row. You have entered illegal characters\n", row + 1);
fprintf(f_out, "ERROR: the error place is in the %d row. You have entered illegal characters\n", row + 1);
break;
case -3:
printf("ERROR: the error place is in the %d row. You should enter lowercase (%s)\n", row + 1, tmp);
fprintf(f_out, "ERROR: the error place is in the %d row. You should enter lowercase (%s)\n", row + 1, tmp);
break;
case -4:
printf("ERROR: the error place is in the %d row. You cannot start a word with a number\n", row + 1);
fprintf(f_out, "ERROR: the error place is in the %d row. You cannot start a word with a number\n", row + 1);
break;
default:
break;
}
}
else
{
ch = getc(f_in);
bqd();
}
}
}