Reading and Scanning Tokens in C
#include
#include
#include
#include
/* enumeration of the token classes */
enum {KEYWORD,IDENTIFIER,INTEGER,REAL,SPECIAL};
/* definition of a token for the token table */
typedef struct
{
char token[128]; /* token found */
int class; /* class to which this token belongs */
int count; /* number of times the token appears */
}token_t;
token_t token_table[256]; /* token table */
int n_tokens; /* number of tokens in the table */
/*function to determine if the character c is a digit, returns 1 if it's a digit
and 0 otherwise */
int is_digit(char c)
{
return (c>='0' && c<='9');
}
/*function to determine if the character c is a character, returns 1 if it's a
character and 0 otherwise */
int is_character(char c)
{
return (c>='a' && c<='z') || (c>='A' && c<='Z');
}
/* function to determine if character c is a special char, returns 1 if it's a
special character, 0 otherwise */
int is_special(char c)
{
return (c=='(' || c==')' || c=='[' || c==']' || c=='+' ||
c=='-' || c=='=' || c==',' || c==';');
}
/* Function to determine if the string identifier is a keyword returns 1 if
it's a keyword and 0 otherwise */
int is_keyword(char *identifier)
{
if(!strcmp(identifier,"IF"))
return 1;
if(!strcmp(identifier,"THEN"))
return 1;
if(!strcmp(identifier,"ELSE"))
return 1;
if(!strcmp(identifier,"BEGIN"))
return 1;
if(!strcmp(identifier,"END"))
return 1;
return 0;
}
/* scanner is the function that goes through all character in the file and builds
a table of tokens */
void scanner(FILE *input)
{
int c;
int state;
char token[128];
int pos;
enum {START,IDENTIFIER,INTEGER,REAL}; /* indicate the state we are in */
state=START;
n_tokens=0;
while(!feof(input))
{
c=getc(input); /* read a char from the file */
switch(state)
{
case START:
pos=0;
if(is_digit(c))
{
state=INTEGER;
token[pos++]=c;
}
else if(is_character(c))
{
state=IDENTIFIER;
token[pos++]=toupper(c);
}
else if(is_special(c))
{
token_table[n_tokens].token[0]=c; /* copy symbol character to table */
token_table[n_tokens].token[1]='\0'; /* add an end of string */
token_table[n_tokens].class=SPECIAL;
n_tokens++; /* increment number of tokens in table */
}
break;
case INTEGER:
if(is_digit(c))
{
state=INTEGER;
token[pos++]=c;
}
else
{
if(c=='.')
{
token[pos++]=c; /* save point in token */
state=REAL;
}
else
{
token[pos++]='\0'; /* add an end of string */
strcpy(token_table[n_tokens].token,token); /* save token in table */
token_table[n_tokens].class=INTEGER; /* save class of token */
n_tokens++; /* increment number of tokens in table */
state=START;
ungetc(c,input); /* reuse current character */
}
}
break;
case REAL:
if(is_digit(c))
{
state=REAL;
token[pos++]=c;
}
else
{
token[pos++]='\0'; /* add an end of string */
strcpy(token_table[n_tokens].token,token); /* save token in table */
token_table[n_tokens].class=REAL; /* save class of token */
n_tokens++; /* increment number of tokens in table */
state=START;
ungetc(c,input); /* reuse current character */
}
break;
case IDENTIFIER:
if(is_character(c))
{
state=IDENTIFIER;
token[pos++]=toupper(c);
}
else
{
token[pos++]='\0'; /* add an end of string */
strcpy(token_table[n_tokens].token,token); /* save token in table */
if(is_keyword(token))
token_table[n_tokens].class=KEYWORD; /* save class of token */
else
token_table[n_tokens].class=IDENTIFIER; /* save class of token */
n_tokens++; /* increment number of tokens in table */
state=START;
ungetc(c,input); /* reuse current character */
}
break;
}
}
}
int main(int argc,char **argv)
{
FILE *file;
int i,j;
int class_count[5]={0,0,0,0,0}; /* count for each class, initialize to zero */
token_t table[256]; /* token table with unique entries */
int n_unique=0,found;
if(argc!=2)
{
printf("Missing source filename.\n");
printf("Usage:\n\tscanner \n");
exit(1);
}
file=fopen(argv[1],"rt");
if(file==NULL)
{
printf("Error opening the file %s\n",argv[1]);
exit(1);
}
scanner(file); /* scan the file and fill the token table */
fclose(file);
printf("\nToken Table: \n");
printf("\n%-10s %s\n","TOKEN","CLASS");
printf("----------------------\n");
for(i=0; i