简单类C语言词法分析器
- 1、下载文档前请自行甄别文档内容的完整性,平台不提供额外的编辑、内容补充、找答案等附加服务。
- 2、"仅部分预览"的文档,不可在线预览部分如存在完整性等问题,可反馈申请退款(可完整预览的文档不适用该条件!)。
- 3、如文档侵犯您的权益,请联系客服反馈,我们会尽快为您处理(人工客服工作时间:9:00-18:30)。
# -*- coding: utf-8 -*-
"""
guanshzh@
学号姓名-词法分析程序.zip (source code ,test data)
Created on Sat Apr 21 15:54:25 2018
这是一个词法分析器程序,用于分析类C语言的代码
@author: Guqi
"""
#以下为保留字
_key = ("auto","break","case","char","const","continue","default", "do","double","else","enum","extern","float","for",
"goto","if","int","long","register","return","short", "signed","static","sizeof","struct","switch","typedef","union",
"unsigned","void","volatile","while") # c语言的32个关键字
#以下为运算符
_operator = ("+","-", "*", "/","+=","++","--", "*=","-=","/=","==","&" ,"&&", "|", "||","=","%","%=","!=","!")
#以下为界符
_delimiters = {'[',']',';','(',')',',','{','}','"','\''}
#以下为常用转义字符
_escch = {'\\a','\\b','\\f','\\n','\\r','\\t','\\v','\\"'}
#本函数用于将结果按行写入到目标文件中
def writeResult(s):
path = input("请输入输出文件名")
fpath = r''+ path
with open(fpath,'w',encoding="UTF-8") as f:
for line in s:
f.write(str(line))
f.write("\n")
f.close
#参数为第row行的str,对此行进行分析,并返回一个list分析结果
def judge(mystr,row,state):
#读取字符,如果为字母或_则考虑其为标识符或关键字,若为数字则考虑其为整数或浮点数,界符、运算符等分别考虑
list2 = []
position = 0
ch = mystr[position]
_value = ''
def getonech():
nonlocal _value , ch , position
_value += ch
position += 1
ch = mystr[position]
def judgeoperator(b): #调用此函数说明此时运算符状态未定,需根据下一个数是否在b中进行判断
nonlocal _value , ch , position
getonech()
if(ch in b):
_value += ch
list2.append(['operator',_value])
_value = ''
else:
list2.append(['operator',mystr[position-1]])
_value = ''
position -= 1
while(1):
if(ch == '\ufeff'):
ch = ' '
if(state == 1 and ch == '"'):
list2.append(['delimiters','"'])
state = 10
elif(state == 10):
if(ch == '"'):
state = 1
list2.append(['str',_value])
_value = ''
list2.append(['delimiters','"'])
position += 1
ch = mystr[position]
elif(ch == '\\'):
_tempvalue = ch + mystr[position + 1]
if(_tempvalue in _escch):
list2.append(['str',_value])
_value = ''
list2.append(['escch',_tempvalue])
position += 1
else:
_value += ch
else:
_value += ch
if(state == 1 and ch == '/'):
ch = mystr[position+1]
if(ch == '/'):
ch = '\n'
elif(ch == '*'):
position += 1
state = 3
else:
ch = '/'
pass
elif(state == 3):
if(ch == '*'):
state = 4
else:
pass
elif(state == 4):
if(ch == '/'):
state = 1
position += 1
ch = mystr[position]
elif(ch == '*'):
pass
else:
state = 3
if(ch == "\n"):
if(state == 10 and mystr[position-1] != '\\'):
state = 1
list2.append(['wrong','引号未结束',str(row)+'行'])
return (list2,state)
if(state == 1 and ch != ' ' and ch != '\t' ):
if(ch.isalpha() or ch =='_'):
while ch.isalpha() or ch.isdigit() or ch == '_': #标识符的构成。逐个读取,直到不满足,并构成字符串_value
getonech()
if(_value in _key):
list2.append(['key',_value])
_value = ''
else:
list2.append(['id',_value])
_value = ''
position -= 1
elif(mystr[position].isdigit()):
if(mystr[position] == '0'):
state = 2
else:
state = 3
getonech()
if(state == 2):
if(ch == '.'):
state = 4
elif(ch.isdigit()):
list2.append(['wrong','非零数字不能以0开头',str(row)+"行"])
_value = ''
position -= 1
else:
list2.append(['integer',_value])
_value = ''
position -= 1
if(state == 3):
while(ch.isdigit()):
getonech()
if(ch == '.'):
state = 4
else:
list2.append(['integer',_value])
_value = ''
position -= 1
if(state == 4):
getonech()
while(ch.isdigit()):
getonech()
if(ch == 'e' or ch == 'E'):
state = 5
else:
list2.append(['float',_value])
_value = ''
position -= 1
if(state == 5):
getonech()
if(ch == '+' or ch == '-'):
state = 6
elif(ch.isdigit()):
state = 6
else:
list2.append(['wrong','e后应为整数',str(row)+'行'])
_value = ''
if(state == 6):
getonech()
while(ch.isdigit()):
getonech()
list2.append(['float',_value])
_value = ''
position -= 1
state = 1
elif(mystr[position] in _operator):
if(ch == '+'):
judgeoperator(['+','='])
elif(ch == '-'):
judgeoperator(['-','='])
elif(ch == '&'):
judgeoperator(['&'])
elif(ch == '|'):
judgeoperator(['|'])
elif(ch == '*' or ch == '/' or ch == '=' or ch == '%' or ch == '!'):
judgeoperator(['='])
elif(mystr[position] in _delimiters):
_value += ch
list2.append(['delimiter',_value])
_value = ''
elif(ch == '\\'):
getonech()
_value += ch
if(_value in _escch):
list2.append(['escch',_value])
_value = ''
else:
list2.append(['wrong','不合理的输入\\',str(row)+'行'])
_value = ''
position -= 1
else:
_value += '接收到不合理的输入'+ch
list2.append(['wrong',_value,str(row)+"行"])
_value = ''
position += 1
ch = mystr[position] #读取下一个数据
if __name__ == '__main__':
path = input("请输入测试代码文件名")
fpath = r''+ path
f = open(fpath, 'r',encoding="UTF-8") # 返回一个文件对象
line = f.readline() # 调用文件的 readline()方法
list1 = []
row = 0
state = 1
while line:
(list2,state) = judge(list(line+'\n'),row,state)
list1.append(list2)
line = f.readline()
row += 1
f.close
writeResult(list1)