aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSarah Elizabeth Higbee <Madchen@fq-130-49-43-231.wireless.pitt.edu>2017-03-14 12:28:34 -0400
committerSarah Elizabeth Higbee <Madchen@fq-130-49-43-231.wireless.pitt.edu>2017-03-14 12:28:34 -0400
commit1bb37b1f9f351100ff628cbfb56f6cf52925a2b3 (patch)
tree457884bdd2814304f1295cd23df7b912c790b495
parent605fb8c0af874cb00b8b3060063ab297ce4410fb (diff)
downloadcoe1622-1bb37b1f9f351100ff628cbfb56f6cf52925a2b3.tar.gz
coe1622-1bb37b1f9f351100ff628cbfb56f6cf52925a2b3.tar.bz2
coe1622-1bb37b1f9f351100ff628cbfb56f6cf52925a2b3.zip
mah lexerHEADmaster
-rw-r--r--driver.c180
-rw-r--r--lexer.l123
-rw-r--r--token.h46
3 files changed, 349 insertions, 0 deletions
diff --git a/driver.c b/driver.c
new file mode 100644
index 0000000..ebee4db
--- /dev/null
+++ b/driver.c
@@ -0,0 +1,180 @@
+#include <stdlib.h>
+#include "lex.yy.c"
+#include "token.h"
+
+/* yyline = 1 for numbering lines 1 to n */
+int yyline = 1;
+int yycolumn = 0;
+int yylength = 0;
+int yylval = 0;
+
+/* the string table is one block separated by null terminators
+ * an int array stores the index of each string
+ * access the strings with pointer arithmetic */
+char *stringTable;
+int stringTableIndices[LIMIT2];
+int nStringsStored = 0;
+int nCharsStored = 0;
+
+int main(int argc, char *argv[])
+{
+ int lexReturn;
+ int i;
+
+ /* initialize string table */
+ stringTable = (char *) malloc(LIMIT1 + 1);
+ strcpy(stringTable,"");
+
+ printf("Line Column Token Index in String Table\n");
+
+ /* get tokens and print table */
+ do {
+
+ lexReturn = yylex();
+
+ /* printing line and column here so the switch cases are less wordy */
+ if (lexReturn != EOFnum) {
+ printf("%-8d %-12d", yyline, yycolumn);
+ }
+
+ switch (lexReturn) {
+ case ANDnum:
+ printf("%-8s\n","ANDnum");
+ break;
+ case ASSGNnum:
+ printf("%-8s\n","ASSGNnum");
+ break;
+ case DECLARATIONSnum:
+ printf("%-8s\n","DECLARATIONSnum");
+ break;
+ case DOTnum:
+ printf("%-8s\n","DOTnum");
+ break;
+ case ENDDECLARATIONSnum:
+ printf("%-8s\n","ENDDECLARATIONSnum");
+ break;
+ case EQUALnum:
+ printf("%-8s\n","EQUALnum");
+ break;
+ case GTnum:
+ printf("%-8s\n","GTnum");
+ break;
+ case IDnum:
+ printf("%-8s %d\n","IDnum", yylval);
+ break;
+ case INTnum:
+ printf("%-8s\n","INTnum");
+ break;
+ case LBRACnum:
+ printf("%-8s\n","LBRACnum");
+ break;
+ case LPARENnum:
+ printf("%-8s\n","LPARENnum");
+ break;
+ case METHODnum:
+ printf("%-8s\n","METHODnum");
+ break;
+ case NEnum:
+ printf("%-8s\n","NEnum");
+ break;
+ case ORnum:
+ printf("%-8s\n","ORnum");
+ break;
+ case PROGRAMnum:
+ printf("%-8s\n","PROGRAMnum");
+ break;
+ case RBRACnum:
+ printf("%-8s\n","RBRACnum");
+ break;
+ case RPARENnum:
+ printf("%-8s\n","RPARENnum");
+ break;
+ case SEMInum:
+ printf("%-8s\n","SEMInum");
+ break;
+ case VALnum:
+ printf("%-8s\n","VALnum");
+ break;
+ case WHILEnum:
+ printf("%-8s\n","WHILEnum");
+ break;
+ case CLASSnum:
+ printf("%-8s\n","CLASSnum");
+ break;
+ case COMMAnum:
+ printf("%-8s\n","COMMAnum");
+ break;
+ case DIVIDEnum:
+ printf("%-8s\n","DIVIDEnum");
+ break;
+ case ELSEnum:
+ printf("%-8s\n","ELSEnum");
+ break;
+ case EQnum:
+ printf("%-8s\n","EQnum");
+ break;
+ case GEnum:
+ printf("%-8s\n","GEnum");
+ break;
+ case ICONSTnum:
+ printf("%-8s\n","ICONSTnum");
+ break;
+ case IFnum:
+ printf("%-8s\n","IFnum");
+ break;
+ case LBRACEnum:
+ printf("%-8s\n","LBRACEnum");
+ break;
+ case LEnum:
+ printf("%-8s\n","LEnum");
+ break;
+ case LTnum:
+ printf("%-8s\n","LTnum");
+ break;
+ case MINUSnum:
+ printf("%-8s\n","MINUSnum");
+ break;
+ case NOTnum:
+ printf("%-8s\n","NOTnum");
+ break;
+ case PLUSnum:
+ printf("%-8s\n","PLUSnum");
+ break;
+ case RBRACEnum:
+ printf("%-8s\n","RBRACEnum");
+ break;
+ case RETURNnum:
+ printf("%-8s\n","RETURNnum");
+ break;
+ case SCONSTnum:
+ printf("%-8s %d\n", "SCONSTnum", yylval);
+ break;
+ case TIMESnum:
+ printf("%-8s\n","TIMESnum");
+ break;
+ case VOIDnum:
+ printf("%-8s\n","VOIDnum");
+ break;
+ case EOFnum:
+ printf("%28s\n","EOFnum");
+ break;
+ default:
+ printf("%-8s\n","bad token");
+ }
+
+ } while (lexReturn != 0);
+
+ /* retrieving string table */
+ printf("\nString Table: ");
+ if (nCharsStored > 0) {
+ for (i=0;i<nStringsStored;i++) {
+ printf("%s ",stringTable + stringTableIndices[i]);
+ }
+ }
+
+ printf("\n\nEnd of File\n");
+
+ free(stringTable);
+
+return 0;
+}
diff --git a/lexer.l b/lexer.l
new file mode 100644
index 0000000..dc499e6
--- /dev/null
+++ b/lexer.l
@@ -0,0 +1,123 @@
+%{
+#include <stdlib.h>
+#include "token.h"
+
+extern int yycolumn, yylength, yyline, yylval;
+
+extern char *stringTable;
+extern int stringTableIndices[];
+extern int nStringsStored;
+extern int nCharsStored;
+
+/* functions */
+void ReportError(char *, int, int);
+int getStrTableIndex(char *, int, int);
+void RemoveQuotes(char *);
+
+%}
+
+/* regular definitions */
+
+id ([a-zA-Z])+
+delim [ \t]
+ws {delim}+
+newline [\n]
+
+%%
+{newline} {yyline++; yycolumn=0;}
+{ws} {yycolumn+=yyleng;}
+"/*"([^*]|[ \t\n]|(\*+([^*/]|[ \t\n])))* {ReportError("comment doesn't terminate",yyline,yycolumn);}
+"/*"([^*]|[ \t\n]|(\*+([^*/]|[ \t\n])))*"*"+"/" {yycolumn+=yyleng;};
+
+"&&" {yycolumn+=yyleng; return (ANDnum);}
+":=" {yycolumn+=yyleng; return (ASSGNnum);}
+[dD][eE][cC][lL][aA][rR][aA][tT][iI][oO][nN][sS] {yycolumn+=yyleng; return (DECLARATIONSnum);}
+"." {yycolumn+=yyleng; return (DOTnum);}
+[eE][nN][dD][dD][eE][cC][lL][aA][rR][aA][tT][iI][oO][nN][sS] {yycolumn+=yyleng; return (ENDDECLARATIONSnum);}
+"=" {yycolumn+=yyleng; return (EQUALnum);}
+">" {yycolumn+=yyleng; return (GTnum);}
+[iI][nN][tT] {yycolumn+=yyleng; return (INTnum);}
+"[" {yycolumn+=yyleng; return (LBRACnum);}
+"(" {yycolumn+=yyleng; return (LPARENnum);}
+[mM][eE][tT][hH][oO][dD] {yycolumn+=yyleng; return (METHODnum);}
+"!=" {yycolumn+=yyleng; return (NEnum);}
+"||" {yycolumn+=yyleng; return (ORnum);}
+[pP][rR][oO][gG][rR][aA][mM] {yycolumn+=yyleng; return (PROGRAMnum);}
+"]" {yycolumn+=yyleng; return (RBRACnum);}
+")" {yycolumn+=yyleng; return (RPARENnum);}
+";" {yycolumn+=yyleng; return (SEMInum);}
+[vV][aA][lL] {yycolumn+=yyleng; return (VALnum);}
+[wW][hH][iI][lL][eE] {yycolumn+=yyleng; return (WHILEnum);}
+[cC][lL][aA][sS][sS] {yycolumn+=yyleng; return (CLASSnum);}
+"," {yycolumn+=yyleng; return (COMMAnum);}
+"/" {yycolumn+=yyleng; return (DIVIDEnum);}
+[eE][lL][sS][eE] {yycolumn+=yyleng; return (ELSEnum);}
+"==" {yycolumn+=yyleng; return (EQnum);}
+">=" {yycolumn+=yyleng; return (GEnum);}
+[iI][fF] {yycolumn+=yyleng; return (IFnum);}
+"{" {yycolumn+=yyleng; return (LBRACEnum);}
+"<=" {yycolumn+=yyleng; return (LEnum);}
+"<" {yycolumn+=yyleng; return (LTnum);}
+"-" {yycolumn+=yyleng; return (MINUSnum);}
+"!" {yycolumn+=yyleng; return (NOTnum);}
+"+" {yycolumn+=yyleng; return (PLUSnum);}
+"}" {yycolumn+=yyleng; return (RBRACEnum);}
+[rR][eE][tT][uU][rR][nN] {yycolumn+=yyleng; return (RETURNnum);}
+"*" {yycolumn+=yyleng; return (TIMESnum);}
+[vV][oO][iI][dD] {yycolumn+=yyleng; return (VOIDnum);}
+
+'(.|[ \t])*' {RemoveQuotes(yytext); yylval = getStrTableIndex(yytext,yyline,yycolumn); yycolumn+=yyleng; return (SCONSTnum);}
+'(.|[ \t\n])*' {ReportError("string constant crosses line boundary",yyline,yycolumn); yyline++; yycolumn=0;}
+[1-9][0-9]* {yylval = atoi(yytext); yycolumn+=yyleng; return (ICONSTnum);}
+[0-9][a-zA-Z0-9]* {ReportError("identifier starting with a digit",yyline,yycolumn); yycolumn+=yyleng;}
+[a-zA-Z][a-zA-Z0-9]* {yylval = getStrTableIndex(yytext,yyline,yycolumn); yycolumn+=yyleng; return (IDnum);}
+
+
+%%
+
+void ReportError(char *message,int line, int column) {
+ fprintf( stderr, "\nError at line %d, column %d (first char): %s\n\n", line, column, message);
+ return;
+}
+
+int getStrTableIndex(char *string, int line, int column) {
+ int i;
+
+ /* use index array to search table for string*/
+ for (i=0; i < nStringsStored; i++) {
+ /* return index if found */
+ if (strcmp(string, stringTable + stringTableIndices[i]) == 0) {
+ return stringTableIndices[i];
+ }
+ }
+
+ /* error if string table is full */
+ if (nStringsStored == LIMIT1) {
+ ReportError("can't add string to string table: max character count.",line, column);
+ return -1;
+ }
+
+ /* error if index table is full */
+ if (nStringsStored == LIMIT2) {
+ ReportError("can't add string to string table: max number of unique strings",line, column);
+ return -1;
+ }
+
+ /* add string to table if not full and return index */
+ stringTableIndices[i] = nCharsStored;
+ memcpy(stringTable + stringTableIndices[i],string,strlen(string)+1);
+ nCharsStored+=strlen(string)+1;
+ nStringsStored++;
+
+ return stringTableIndices[i];
+}
+
+/* this lexer detects string constants by their quotes, so yytext includes them
+ * even though they aren't really part of the string
+ * so this removes them */
+void RemoveQuotes(char *string) {
+ memmove(string, string+1, strlen(string));
+ string[strlen(string) - 1] = '\0';
+
+ return;
+}
diff --git a/token.h b/token.h
new file mode 100644
index 0000000..20bfa3f
--- /dev/null
+++ b/token.h
@@ -0,0 +1,46 @@
+/* limit on total characters */
+#define LIMIT1 5000
+
+/* limit on number of unique strings */
+#define LIMIT2 1000
+
+#define ANDnum 257
+#define ASSGNnum 258
+#define DECLARATIONSnum 259
+#define DOTnum 260
+#define ENDDECLARATIONSnum 261
+#define EQUALnum 262
+#define GTnum 263
+#define IDnum 264
+#define INTnum 265
+#define LBRACnum 266
+#define LPARENnum 267
+#define METHODnum 268
+#define NEnum 269
+#define ORnum 270
+#define PROGRAMnum 271
+#define RBRACnum 272
+#define RPARENnum 273
+#define SEMInum 274
+#define VALnum 275
+#define WHILEnum 276
+#define CLASSnum 277
+#define COMMAnum 278
+#define DIVIDEnum 279
+#define ELSEnum 280
+#define EQnum 281
+#define GEnum 282
+#define ICONSTnum 283
+#define IFnum 284
+#define LBRACEnum 285
+#define LEnum 286
+#define LTnum 287
+#define MINUSnum 288
+#define NOTnum 289
+#define PLUSnum 290
+#define RBRACEnum 291
+#define RETURNnum 292
+#define SCONSTnum 293
+#define TIMESnum 294
+#define VOIDnum 295
+#define EOFnum 0