diff options
| -rw-r--r-- | driver.c | 180 | ||||
| -rw-r--r-- | lexer.l | 123 | ||||
| -rw-r--r-- | token.h | 46 |
3 files changed, 349 insertions, 0 deletions
diff --git a/driver.c b/driver.c new file mode 100644 index 0000000..ebee4db --- /dev/null +++ b/driver.c @@ -0,0 +1,180 @@ +#include <stdlib.h> +#include "lex.yy.c" +#include "token.h" + +/* yyline = 1 for numbering lines 1 to n */ +int yyline = 1; +int yycolumn = 0; +int yylength = 0; +int yylval = 0; + +/* the string table is one block separated by null terminators + * an int array stores the index of each string + * access the strings with pointer arithmetic */ +char *stringTable; +int stringTableIndices[LIMIT2]; +int nStringsStored = 0; +int nCharsStored = 0; + +int main(int argc, char *argv[]) +{ + int lexReturn; + int i; + + /* initialize string table */ + stringTable = (char *) malloc(LIMIT1 + 1); + strcpy(stringTable,""); + + printf("Line Column Token Index in String Table\n"); + + /* get tokens and print table */ + do { + + lexReturn = yylex(); + + /* printing line and column here so the switch cases are less wordy */ + if (lexReturn != EOFnum) { + printf("%-8d %-12d", yyline, yycolumn); + } + + switch (lexReturn) { + case ANDnum: + printf("%-8s\n","ANDnum"); + break; + case ASSGNnum: + printf("%-8s\n","ASSGNnum"); + break; + case DECLARATIONSnum: + printf("%-8s\n","DECLARATIONSnum"); + break; + case DOTnum: + printf("%-8s\n","DOTnum"); + break; + case ENDDECLARATIONSnum: + printf("%-8s\n","ENDDECLARATIONSnum"); + break; + case EQUALnum: + printf("%-8s\n","EQUALnum"); + break; + case GTnum: + printf("%-8s\n","GTnum"); + break; + case IDnum: + printf("%-8s %d\n","IDnum", yylval); + break; + case INTnum: + printf("%-8s\n","INTnum"); + break; + case LBRACnum: + printf("%-8s\n","LBRACnum"); + break; + case LPARENnum: + printf("%-8s\n","LPARENnum"); + break; + case METHODnum: + printf("%-8s\n","METHODnum"); + break; + case NEnum: + printf("%-8s\n","NEnum"); + break; + case ORnum: + printf("%-8s\n","ORnum"); + break; + case PROGRAMnum: + printf("%-8s\n","PROGRAMnum"); + break; + case RBRACnum: + printf("%-8s\n","RBRACnum"); + break; + case RPARENnum: + printf("%-8s\n","RPARENnum"); + break; + case SEMInum: + printf("%-8s\n","SEMInum"); + break; + case VALnum: + printf("%-8s\n","VALnum"); + break; + case WHILEnum: + printf("%-8s\n","WHILEnum"); + break; + case CLASSnum: + printf("%-8s\n","CLASSnum"); + break; + case COMMAnum: + printf("%-8s\n","COMMAnum"); + break; + case DIVIDEnum: + printf("%-8s\n","DIVIDEnum"); + break; + case ELSEnum: + printf("%-8s\n","ELSEnum"); + break; + case EQnum: + printf("%-8s\n","EQnum"); + break; + case GEnum: + printf("%-8s\n","GEnum"); + break; + case ICONSTnum: + printf("%-8s\n","ICONSTnum"); + break; + case IFnum: + printf("%-8s\n","IFnum"); + break; + case LBRACEnum: + printf("%-8s\n","LBRACEnum"); + break; + case LEnum: + printf("%-8s\n","LEnum"); + break; + case LTnum: + printf("%-8s\n","LTnum"); + break; + case MINUSnum: + printf("%-8s\n","MINUSnum"); + break; + case NOTnum: + printf("%-8s\n","NOTnum"); + break; + case PLUSnum: + printf("%-8s\n","PLUSnum"); + break; + case RBRACEnum: + printf("%-8s\n","RBRACEnum"); + break; + case RETURNnum: + printf("%-8s\n","RETURNnum"); + break; + case SCONSTnum: + printf("%-8s %d\n", "SCONSTnum", yylval); + break; + case TIMESnum: + printf("%-8s\n","TIMESnum"); + break; + case VOIDnum: + printf("%-8s\n","VOIDnum"); + break; + case EOFnum: + printf("%28s\n","EOFnum"); + break; + default: + printf("%-8s\n","bad token"); + } + + } while (lexReturn != 0); + + /* retrieving string table */ + printf("\nString Table: "); + if (nCharsStored > 0) { + for (i=0;i<nStringsStored;i++) { + printf("%s ",stringTable + stringTableIndices[i]); + } + } + + printf("\n\nEnd of File\n"); + + free(stringTable); + +return 0; +} @@ -0,0 +1,123 @@ +%{ +#include <stdlib.h> +#include "token.h" + +extern int yycolumn, yylength, yyline, yylval; + +extern char *stringTable; +extern int stringTableIndices[]; +extern int nStringsStored; +extern int nCharsStored; + +/* functions */ +void ReportError(char *, int, int); +int getStrTableIndex(char *, int, int); +void RemoveQuotes(char *); + +%} + +/* regular definitions */ + +id ([a-zA-Z])+ +delim [ \t] +ws {delim}+ +newline [\n] + +%% +{newline} {yyline++; yycolumn=0;} +{ws} {yycolumn+=yyleng;} +"/*"([^*]|[ \t\n]|(\*+([^*/]|[ \t\n])))* {ReportError("comment doesn't terminate",yyline,yycolumn);} +"/*"([^*]|[ \t\n]|(\*+([^*/]|[ \t\n])))*"*"+"/" {yycolumn+=yyleng;}; + +"&&" {yycolumn+=yyleng; return (ANDnum);} +":=" {yycolumn+=yyleng; return (ASSGNnum);} +[dD][eE][cC][lL][aA][rR][aA][tT][iI][oO][nN][sS] {yycolumn+=yyleng; return (DECLARATIONSnum);} +"." {yycolumn+=yyleng; return (DOTnum);} +[eE][nN][dD][dD][eE][cC][lL][aA][rR][aA][tT][iI][oO][nN][sS] {yycolumn+=yyleng; return (ENDDECLARATIONSnum);} +"=" {yycolumn+=yyleng; return (EQUALnum);} +">" {yycolumn+=yyleng; return (GTnum);} +[iI][nN][tT] {yycolumn+=yyleng; return (INTnum);} +"[" {yycolumn+=yyleng; return (LBRACnum);} +"(" {yycolumn+=yyleng; return (LPARENnum);} +[mM][eE][tT][hH][oO][dD] {yycolumn+=yyleng; return (METHODnum);} +"!=" {yycolumn+=yyleng; return (NEnum);} +"||" {yycolumn+=yyleng; return (ORnum);} +[pP][rR][oO][gG][rR][aA][mM] {yycolumn+=yyleng; return (PROGRAMnum);} +"]" {yycolumn+=yyleng; return (RBRACnum);} +")" {yycolumn+=yyleng; return (RPARENnum);} +";" {yycolumn+=yyleng; return (SEMInum);} +[vV][aA][lL] {yycolumn+=yyleng; return (VALnum);} +[wW][hH][iI][lL][eE] {yycolumn+=yyleng; return (WHILEnum);} +[cC][lL][aA][sS][sS] {yycolumn+=yyleng; return (CLASSnum);} +"," {yycolumn+=yyleng; return (COMMAnum);} +"/" {yycolumn+=yyleng; return (DIVIDEnum);} +[eE][lL][sS][eE] {yycolumn+=yyleng; return (ELSEnum);} +"==" {yycolumn+=yyleng; return (EQnum);} +">=" {yycolumn+=yyleng; return (GEnum);} +[iI][fF] {yycolumn+=yyleng; return (IFnum);} +"{" {yycolumn+=yyleng; return (LBRACEnum);} +"<=" {yycolumn+=yyleng; return (LEnum);} +"<" {yycolumn+=yyleng; return (LTnum);} +"-" {yycolumn+=yyleng; return (MINUSnum);} +"!" {yycolumn+=yyleng; return (NOTnum);} +"+" {yycolumn+=yyleng; return (PLUSnum);} +"}" {yycolumn+=yyleng; return (RBRACEnum);} +[rR][eE][tT][uU][rR][nN] {yycolumn+=yyleng; return (RETURNnum);} +"*" {yycolumn+=yyleng; return (TIMESnum);} +[vV][oO][iI][dD] {yycolumn+=yyleng; return (VOIDnum);} + +'(.|[ \t])*' {RemoveQuotes(yytext); yylval = getStrTableIndex(yytext,yyline,yycolumn); yycolumn+=yyleng; return (SCONSTnum);} +'(.|[ \t\n])*' {ReportError("string constant crosses line boundary",yyline,yycolumn); yyline++; yycolumn=0;} +[1-9][0-9]* {yylval = atoi(yytext); yycolumn+=yyleng; return (ICONSTnum);} +[0-9][a-zA-Z0-9]* {ReportError("identifier starting with a digit",yyline,yycolumn); yycolumn+=yyleng;} +[a-zA-Z][a-zA-Z0-9]* {yylval = getStrTableIndex(yytext,yyline,yycolumn); yycolumn+=yyleng; return (IDnum);} + + +%% + +void ReportError(char *message,int line, int column) { + fprintf( stderr, "\nError at line %d, column %d (first char): %s\n\n", line, column, message); + return; +} + +int getStrTableIndex(char *string, int line, int column) { + int i; + + /* use index array to search table for string*/ + for (i=0; i < nStringsStored; i++) { + /* return index if found */ + if (strcmp(string, stringTable + stringTableIndices[i]) == 0) { + return stringTableIndices[i]; + } + } + + /* error if string table is full */ + if (nStringsStored == LIMIT1) { + ReportError("can't add string to string table: max character count.",line, column); + return -1; + } + + /* error if index table is full */ + if (nStringsStored == LIMIT2) { + ReportError("can't add string to string table: max number of unique strings",line, column); + return -1; + } + + /* add string to table if not full and return index */ + stringTableIndices[i] = nCharsStored; + memcpy(stringTable + stringTableIndices[i],string,strlen(string)+1); + nCharsStored+=strlen(string)+1; + nStringsStored++; + + return stringTableIndices[i]; +} + +/* this lexer detects string constants by their quotes, so yytext includes them + * even though they aren't really part of the string + * so this removes them */ +void RemoveQuotes(char *string) { + memmove(string, string+1, strlen(string)); + string[strlen(string) - 1] = '\0'; + + return; +} @@ -0,0 +1,46 @@ +/* limit on total characters */ +#define LIMIT1 5000 + +/* limit on number of unique strings */ +#define LIMIT2 1000 + +#define ANDnum 257 +#define ASSGNnum 258 +#define DECLARATIONSnum 259 +#define DOTnum 260 +#define ENDDECLARATIONSnum 261 +#define EQUALnum 262 +#define GTnum 263 +#define IDnum 264 +#define INTnum 265 +#define LBRACnum 266 +#define LPARENnum 267 +#define METHODnum 268 +#define NEnum 269 +#define ORnum 270 +#define PROGRAMnum 271 +#define RBRACnum 272 +#define RPARENnum 273 +#define SEMInum 274 +#define VALnum 275 +#define WHILEnum 276 +#define CLASSnum 277 +#define COMMAnum 278 +#define DIVIDEnum 279 +#define ELSEnum 280 +#define EQnum 281 +#define GEnum 282 +#define ICONSTnum 283 +#define IFnum 284 +#define LBRACEnum 285 +#define LEnum 286 +#define LTnum 287 +#define MINUSnum 288 +#define NOTnum 289 +#define PLUSnum 290 +#define RBRACEnum 291 +#define RETURNnum 292 +#define SCONSTnum 293 +#define TIMESnum 294 +#define VOIDnum 295 +#define EOFnum 0 |
