编号: CSM49
| 实习 | 一 | 二 | 三 | 四 | 五 | 六 | 七 | 八 | 九 | 十 | 总评 | 教师签名 |
| 成绩 |
计算机学院
《编译原理》课程
词法分析
实习报告
编 号: 班序号
实习题目: 词法分析程序
专业(班): 计科四班
学生学号:
学生姓名:
任课教师:
2022 年 4 月 27 日
目录
![]()
正在上传…重新上传取消正在上传…重新上传取消
其中
![]()
正在上传…重新上传取消正在上传…重新上传取消为终极符号,
![]()
正在上传…重新上传取消正在上传…重新上传取消={
A,B,C,D,E,F,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z,
- b,c,d,e,f,g,h,i,j,k,1,m,n,o,p,q,r,s,t,u,v,w,x,y,z,
1,2,3,4,5,6,7,8,9,0,
!,#,$,%,&,*,(,),-,_,+,=,[,],:,<,>, , ,;,.,/,?,|,^,\,{,}
}
![]()
正在上传…重新上传取消正在上传…重新上传取消为非终结符号,={}
![]()
正在上传…重新上传取消正在上传…重新上传取消为产生式集合
![]()
正在上传…重新上传取消正在上传…重新上传取消
![]()
正在上传…重新上传取消正在上传…重新上传取消为开始符号
| 单词符号 | 编码 |
| private | 30 |
| protected | 31 |
| this | 32 |
| false | 33 |
| true | 34 |
| try | 35 |
| catch | 36 |
| throw | 37 |
| goto | 38 |
| using | 39 |
| new | 40 |
| namespace | 41 |
| + | 42 |
| ++ | 43 |
| - | 44 |
| -- | 45 |
| * | 46 |
| / | 47 |
| % | 48 |
| < | 49 |
| <= | 50 |
| << | 51 |
| > | 52 |
| >= | 53 |
| >> | 54 |
| = | 55 |
| == | 56 |
| ! | 57 |
| != | 58 |
| 单词符号 | 编码 |
| & | 59 |
| && | 60 |
| | | 61 |
| || | 62 |
| ^ | 63 |
| , | 64 |
| ; | 65 |
| . | 66 |
| { | 67 |
| } | 68 |
| [ | 69 |
| ] | 70 |
| ( | 71 |
| ) | 72 |
| # | 73 |
| \ | 74 |
| ? | 75 |
| 标识符表 | 76 |
| 整数常量表 | 77 |
| 浮点数常量表 | 78 |
单词编码表
| 单词符号 | 编码 |
| main | 1 |
| if | 2 |
| else | 3 |
| while | 4 |
| do | 5 |
| for | 6 |
| int | 7 |
| double | 8 |
| float | 9 |
| char | 10 |
| long | 11 |
| short | 12 |
| enum | 13 |
| static | 14 |
| bool | 15 |
| void | 16 |
| switch | 17 |
| case | 18 |
| break | 19 |
| continue | 20 |
| signed | 21 |
| unsigned | 22 |
| return | 23 |
| default | 24 |
| struct | 25 |
| include | 26 |
| define | 27 |
| class | 28 |
| public | 29 |
第三部分 状态转换图
![]()
正在上传…重新上传取消正在上传…重新上传取消
#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define NUM 15
using namespace std;
FILE* f_in;
FILE* f_out;
char f1_name[100];
char f2_name[100];
char keyWord[NUM][20] = { "default","if", "else","for","while","do","int","read","write","float","switch","case","break","function","call" }; //保留字
char token[15]; //识别的单词
char token_num; //记录单词长度
char ch; //单词的首个字符
int row; //记录程序编译行数
char tmp[15];
int flag1; //用于标志换行后可能产生错误
int flag2; //区别标识符和保留字大小写敏感
int flag3;
int flag = 0;
char(*keyword)[20] = keyWord;
char* string;
void compile();
int compile_word();
void sort(char(*a)[20]);
int binary_S(char(*a)[20], char* string);
int main() {
sort(keyWord);
printf("请输入要编译的文件名字:");
scanf("%s", f1_name);
f_in = fopen(f1_name, "r");
printf("请输入要将编译结果存入的文件名字:");
scanf("%s", f2_name);
f_out = fopen(f2_name, "w");
compile();
fclose(f_in);
fclose(f_out);
system("pause");
return 0;
}
//用冒泡法将保留字数组排序(根据ASCII码)
void sort(char(*a)[20]) {
char tmp[20];
for (int i = 0;i < NUM;i++) {
for (int j = i + 1;j < NUM;j++) {
if (strcmp(a[j], a[i]) < 0) {
for (int k = 0;k < 20;k++) {
tmp[k] = a[i][k];
a[i][k] = a[j][k];
a[j][k] = tmp[k];
}
}
}
}
}
//对数组进行折半查找
int binary_S(char(*a)[20], char* string) {
int low = 0;
int high = NUM-1;
while (low <= high) {
int middle = (low + high) / 2;
if (strcmp(string, a[middle]) == 0) {
return middle;
}
else if (strcmp(string, a[middle]) < 0) {
high = middle - 1;
}
else {
low = middle + 1;
}
}
return -1;
}
void bqd() {
//状态2
switch (ch)
{
case '*': ch = getc(f_in); //转到状态3
//状态3
s3:while (ch != '*') {
ch = getc(f_in);
if (ch == EOF) {
printf("ERROR: the error place is in the %d row.注释错误\n", row + 1);
return;
}
} //状态3循环
switch (ch)
{
case '*':ch = getc(f_in); //转到状态4
//状态4
while (ch == '*') ch = getc(f_in); //状态4循环
switch (ch)
{
case '/':ch = getc(f_in);
printf("注释正确\n");
fprintf(f_out, "注释正确\n");
return; //状态5结束
default:goto s3; //转到状态3
}
default: goto end;
}
default:
printf("单分符\t%s\n", token); //状态6
goto end;
}
end:return;
}
int compile_word() {
//将识别的单词数组初始化
for (int i = 0;i < 15;i++) {
token[i] = NULL;
tmp[i] = NULL;
}
token_num = 0;
flag1 = 0;
flag2 = 0;
flag3 = 0;
//处理空格
while ((ch == ' ') || (ch == '\n')) {
if (ch == '\n') {
row++;
flag1 = 1;
}
ch = getc(f_in);
}
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { //输入可能是标识符或者保留字
//组成一个单词
while ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9')) {
token[token_num++] = ch;
ch = getc(f_in);
}
if (flag1 == 1) {
for (int j = 0;j < token_num;j++) {
tmp[j] = token[j];
if (token[j] >= 'A' && token[j] <= 'Z') {
tmp[j] = tmp[j] + 32; //大写转换为小写
flag2 = 1;
}
}
}
token[token_num++] = '\0';
//比对保留字
for (int i = 0;i < NUM;i++) {
if (flag1 == 1 && flag2 == 1) {
if (binary_S(keyWord, tmp)!=-1) {
return -3; //大小写敏感
}
}
if (binary_S(keyWord, token)!=-1) { //匹配到某个保留字
return 1;
}
}
return 2; //关键字ID
}
else if (ch >= '0' && ch <= '9') { //输入的是常量NUM(整型)
//组成一个单词
while ((ch >= '0' && ch <= '9') || ch == '.') { //扩展为浮点型
token[token_num++] = ch;
ch = getc(f_in);
while((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
ch = getc(f_in);
flag3 = 1;
}
}
if (flag3 != 1) {
return 3;
} //常量
else return -4; //非法单词
}
else { //输入为纯单分符
token[token_num++] = ch;
switch (ch)
{
//单分符为4 双分界符为5
case '*':
ch = getc(f_in);
return 4;
case '+':
ch = getc(f_in);
if (ch == '+') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
} //可以扩展为++
case '-':
ch = getc(f_in);
return 4; //可以扩展为--
case '(':
ch = getc(f_in);
return 4;
case ')':
ch = getc(f_in);
return 4;
case '{':
ch = getc(f_in);
return 4;
case '}':
ch = getc(f_in);
return 4;
case ',':
ch = getc(f_in);
return 4;
case ';':
ch = getc(f_in);
return 4;
case '"':
ch = getc(f_in);
return 4;
case '/':
ch = getc(f_in);
return 4;
case '>':
ch = getc(f_in);
//读下个字符看看是不是双分符
if (ch == '=') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
}
case '<':
ch = getc(f_in);
//读下个字符看看是不是双分符
if (ch == '=') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
}
case '!':
ch = getc(f_in);
//读下个字符看看是不是双分符
if (ch == '=') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
}
case ':':
ch = getc(f_in);
return 4;
case '=':
ch = getc(f_in);
//读下个字符看看是不是双分符
if (ch == '=') {
token[token_num++] = ch;
ch = getc(f_in);
return 5;
}
else {
return 4;
}
case EOF:return -1; //文件结尾符号
default: //错误没有匹配
ch = getc(f_in);
return -2;
}
}
}
int INT;
void compile() {
int state; //记录编译状态
int error[100]; //记录错误行数
printf("编译结果:\n");
printf("类别值\t自身值\n");
//读取文件第一个字符
ch = getc(f_in);
while (1) {
if (ch != '/') {
state = compile_word();
if (state == -1) {
break;
}
switch (state)
{
case 1:
printf("%s\t%s\n", token, token);
fprintf(f_out, "%s\t%s\n", token, token);
break;
case 2: {
printf("ID\t%s\n", token);
fprintf(f_out, "ID\t%s\n", token);}
break;
case 3:
printf("NUM\t%s\n", token);
fprintf(f_out,"NUM\t%s\n", token);
break;
case 4:
printf("%s\t%s\n", token, token);
fprintf(f_out, "%s\t%s\n", token, token);
break;
case 5:
printf("%s\t%s\n", token, token);
fprintf(f_out, "%s\t%s\n", token, token);
break;
case -2:
printf("ERROR: the error place is in the %d row. You have entered illegal characters\n", row + 1);
fprintf(f_out, "ERROR: the error place is in the %d row. You have entered illegal characters\n", row + 1);
break;
case -3:
printf("ERROR: the error place is in the %d row. You should enter lowercase (%s)\n", row + 1, tmp);
fprintf(f_out, "ERROR: the error place is in the %d row. You should enter lowercase (%s)\n", row + 1, tmp);
break;
case -4:
printf("ERROR: the error place is in the %d row. You cannot start a word with a number\n", row + 1);
fprintf(f_out, "ERROR: the error place is in the %d row. You cannot start a word with a number\n", row + 1);
break;
default:
break;
}
}
else
{
ch = getc(f_in);
bqd();
}
}
}
测试数据:
![]()
正在上传…重新上传取消正在上传…重新上传取消
实验结果:
![]()
正在上传…重新上传取消正在上传…重新上传取消


被折叠的 条评论
为什么被折叠?



