duckflew
duckflew
Published on 2021-04-23 / 193 Visits
0
0

C++实现一个简单的PL0语言词法分析器

#include<bits/stdc++.h>
using namespace std;
#define keyWordNum 17
#define SIGN_NUM 10
#define IdentMaxLen 20
#define operatorSymNum 7
#define delimiterNum 4


struct  node  //单词类型
{
    /* data */
    int  sym;
    string id;
    int num;
};
//类别码
vector<node> results;
enum symType{
    SYM_NULL,	SYM_IDENTIFIER,	SYM_NUMBER,	SYM_PLUS,
    SYM_MINUS,	SYM_TIMES,	SYM_SLASH,	SYM_ODD,	SYM_EQU,
    SYM_NEQ,	SYM_LES,	SYM_LEQ,	SYM_GTR,	SYM_GEQ,
    SYM_LPAREN,	SYM_RPAREN,	SYM_COMMA,	SYM_SEMICOLON,	SYM_PERIOD,
    SYM_BECOMES,SYM_BEGIN,	SYM_END,	SYM_IF,	SYM_THEN,	SYM_WHILE,
    SYM_DO,	SYM_CALL,	SYM_CONST,	SYM_VAR,	SYM_PROCEDURE ,SYM_LILIANG,SYM_NAME,SYM_STUNUMBER
    ,SYM_READ,SYM_WRITE
};

char  operators[operatorSymNum+1]=
{
    '+','-','*','/','=','>','<'
};
symType operatorsSym[operatorSymNum+1]=
{
    SYM_PLUS,SYM_MINUS,SYM_TIMES,SYM_SLASH,SYM_EQU,SYM_GTR,SYM_LES
};
char  delemiters[delimiterNum+1]=
{
    '(',')','.',';',','
};
symType delemiterSym[delimiterNum+1]=
{
    SYM_LPAREN,SYM_RPAREN,SYM_PERIOD,SYM_SEMICOLON,SYM_COMMA
};
string  keyWords[keyWordNum+1]=
    {
        "","begin","call","const","do","end","if","odd","procedure","then","var","while","LiLiang","StuNumber","Name","read","write"
    };
symType keyWordSym[keyWordNum+1]=
{
    SYM_NULL,SYM_BEGIN,SYM_CALL,SYM_CONST,SYM_DO,SYM_END,SYM_IF,SYM_ODD,SYM_PROCEDURE,SYM_THEN,SYM_VAR,SYM_WHILE,SYM_LILIANG,SYM_STUNUMBER,SYM_NAME,
    SYM_READ,SYM_WHILE
};
FILE* f;
void init()
{
    f=fopen("code.txt","r");
}
bool isDelemiterChar(char ch)
{
    for(int i=0;i<=delimiterNum;i++)
    {
        if(ch==delemiters[i])return true;
    }
    return false;
}
bool isOperatorChar(char ch)
{
     for(int i=0;i<=operatorSymNum;i++)
    {
        if(ch==operators[i])return true;
    }
    return false;
}
void getSym()
{
    char ch=fgetc(f);
    while(ch!=EOF)
    {
       while (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')
		{
			ch = fgetc(f);
		}
        if(isalpha(ch))//如果输入是字母   则应该是关键字或者字符
        {
            char a[IdentMaxLen];
            int k=0;
            for (;(isalpha(ch)||isdigit(ch))&&k<IdentMaxLen; k++)
            {
                /* code */
                a[k]=ch;
                ch=fgetc(f);
            }
            a[k]='\0';
            string id=a;
            int i=1;
            for (; i <=keyWordNum; i++)
            {
                if(keyWords[i]==id)break;    /* code */
            }
            if(i<=keyWordNum)
            {
                /**
                 * 保留字
                 */
                node reservedWord;
                reservedWord.id=id;
                reservedWord.sym=keyWordSym[i];
                results.push_back(reservedWord);
                cout<<" "<<id<<"   保留字"<<endl;
            }
            else{
                //不是保留字
                 node identifier;
                identifier.id=id;
                identifier.sym=SYM_IDENTIFIER;
                results.push_back(identifier);
                cout<<" "<<id<<"   标识符"<<endl;
            }
        }
        else if(isdigit(ch)){
            node numWord;
            numWord.sym=SYM_NUMBER;
            int k=0;
            int num=0;
            char numChars[20];
            while (isdigit(ch))
            {
                /* code */
                numChars[k]=ch;
                num=num*10+ch-'0';
                ch=fgetc(f);
                k++;
            }
            numChars[k]='\0';
            numWord.num=num;
            results.push_back(numWord);
            cout<<" "<<string(numChars)<<"    数字"<<endl;
        }
        else if(isOperatorChar(ch))
        {
            if(ch==':')
            {
                ch=fgetc(f);
                if(ch=='=')
                {
                    node becomes;
                    becomes.sym=SYM_BECOMES;
                    becomes.id=":=";
                    results.push_back(becomes);
                    cout<<" "<<":=    运算符"<<endl;
                     ch=fgetc(f);
                }
                else
                {
                    node illeagal;
                    illeagal.sym=SYM_NULL;
                    illeagal.id=":";
                    cout<<" "<<illeagal.id<<"   非法"<<endl;
                }
            }
            else if(ch=='>')
            {
                ch=fgetc(f);
                if(ch=='=')
                {
                    node greaterEqual;
                    greaterEqual.sym=SYM_GEQ;
                    greaterEqual.id=">=";
                    results.push_back(greaterEqual);
                    cout<<" "<<">=    运算符"<<endl;
                     ch=fgetc(f);
                }
                else
                {
                   node equalOperator;
                   equalOperator.id="=";
                   equalOperator.sym=SYM_EQU;
                   results.push_back(equalOperator);
                   cout<<" "<<"=    运算符"<<endl;
                }
            }
            else if(ch=='<')
            {
                ch=fgetc(f);
                if(ch=='=')
                {
                    node lessEqual;
                    lessEqual.sym=SYM_GEQ;
                    lessEqual.id="<=";
                    results.push_back(lessEqual);
                    cout<<"<=    运算符"<<endl;
                    results.push_back(lessEqual);
                     ch=fgetc(f);
                }
                else
                {
                   node lessOperator;
                   lessOperator.id="=";
                   lessOperator.sym=SYM_LES;
                   results.push_back(lessOperator);
                   cout<<" "<<"<   运算符"<<endl;
                }
            }
            else
            {
                node operatorNode;//单字符运算符
                operatorNode.id=string(ch,1);
            for(int i=0;i<operatorSymNum+1;i++)
            {
                if(ch==delemiters[i])operatorNode.sym=operatorsSym[i];
            }
            cout<<" "<<ch<<"    运算符"<<endl;
            results.push_back(operatorNode);
             ch=fgetc(f);
            }
       
        }
        else if(isDelemiterChar(ch))
        {
            node delimiter;//界符
            delimiter.id=string(ch,1);
            for(int i=0;i<delimiterNum+1;i++)
            {
                if(ch==delemiters[i])delimiter.sym=delemiterSym[i];
            }
            cout<<" "<<ch<<"    界符"<<endl;
            results.push_back(delimiter);
             ch=fgetc(f);
        }
        else 
        {
            cout<<" "<<ch<<"  非法字符"<<endl;
            ch=fgetc(f);
        }
    }
}
int main()
{
    init();
    getSym();
    system("pause");
    return 0;
}

Comment