foogol: | back to bertnase | |
[ To cfoogol from Volume 42, Issue 88. ]
Subject: v08i088: A (vax) compiler for a tiny ALGOL-like language Newsgroups: mod.sources Approved: mirror!rs Submitted by: seismo!enea!suadb!lindberg (Per Lindberg QZ) Mod.sources: Volume 8, Issue 88 Archive-name: foogol [ You'll have to write your own RTS, and link it in. See the documentation... --r$ ] #! /bin/sh # This is a shell archive. Remove anything before this line, # then unpack it by saving it in a file and typing "sh file". # If all goes well, you will see the message "End of shell archive." # Contents: Makefile foogol.doc foogol.c # Wrapped by rs@mirror PATH=/bin:/usr/bin:/usr/ucb; export PATH echo shar: extracting "'Makefile'" '(53 characters)' if test -f 'Makefile' ; then echo shar: will not over-write existing file "'Makefile'" else sed 's/^X//' >Makefile <<'@//E*O*F Makefile//' Xfoogol: foogol.c X $(CC) $(CFLAGS) -o foogol foogol.c @//E*O*F Makefile// if test 53 -ne "`wc -c <'Makefile'`"; then echo shar: error transmitting "'Makefile'" '(should have been 53 characters)' fi fi # end of overwriting check echo shar: extracting "'foogol.doc'" '(4379 characters)' if test -f 'foogol.doc' ; then echo shar: will not over-write existing file "'foogol.doc'" else sed 's/^X//' >foogol.doc <<'@//E*O*F foogol.doc//' Xfc.doc Last modified: 1986-12-15 X X X The FOOGOL-IV compiler X relese notes and documentation X Per Lindberg, QZ X The mad programmer strikes again! X XNAME X fc - foogol compiler X XSYNOPSIS X fc [ -d ] infile [ outfile ] X XDESCRIPTION X fc compiles a foogol program into VAX/UNIX assembly language. X Default extentions are ".foo" for the source file and ".s" X for the compiled file. In other words, the resulting outfile X is is VAX/UNIX assembly language, and can be assebled and X linked with the vanilla UNIX as and ld programs. X X Options: (There is only one switch so far...) X X -d Sets the debug option, which makes the compiler print X out internal diagnostics. Useful for debugging and X understanding the compiler. X X The foogol object code has to be linked with the RTS (Run-Time X system) and the C library in order to be able to do I/O. X Example: X fc foo X as foo.s -o foo.o X ld /lib/crt0.o rts.o foo.o -o foo -lc X Or (shorter): X fc foo X cc rts.o foo.s -o foo X X The RTS (Run-Time System) should be compiled before it is X linked with the foogol object code. It consists of just three X output functions written in C: X X PRS(s) char *s; { printf("%s",s); } X X PRN(i) int i; { printf("%d",i); } X X PR() { putchar('\n'); } X X The foogol language is basically a very small ALGOL. The X current syntactic elements are: X X PROGRAM ::= begin X [ DECLARATION ; ] X STATEMENT [ ; STATEMENT ]... X end X X DECLARATION ::= integer ID_SEQUENCE X X ID_SEQUENCE ::= IDENTIFIER [ , IDENTIFIER ] X X STATEMENT ::= IO_STATEMENT X ! WHILE_STATEMENT X ! COND_STATEMENT X ! BLOCK X ! ASSIGN_STATEMENT X X BLOCK ::= begin X [ DECLARATION ] X [ ; STATEMENT ]... X end X X IO_STATEMENT ::= prints ( STRING ) X ! printn ( EXPRESSION ) X ! print X X COND_STATEMENT ::= if EXPRESSION then STATEMENT X [ else STATEMENT ] X X WHILE_STATEMENT ::= while EXPRESSION do STATEMENT X X ASSIGN_STATEMENT::= IDENTIFIER := EXPRESSION X X EXPRESSION ::= EXPR1 [ RHS ] X X RHS ::= = EXPR1 X ! # EXPR1 X X SIGNED_TERM ::= + TERM X ! - TERM X X TERM ::= PRIMARY [ * PRIMARY ]... X X PRIMARY ::= IDENTIFIER X ! NUMBER X ! ( EXPRESSION ) X X EXPR1 ::= TERM [ SIGNED_TERM ]... X X IDENTIFIER ::=X X NUMBER ::= X X STRING ::= X X Example program: X X begin X integer n, div, sub, test, testcopy, found, max; X test := 2; max := 10; /* number of primes wanted */ X while n # max do begin X div:= test-1; found:= 0; X while div-1 do begin X testcopy:= test; sub:= 0; X while testcopy do begin X sub:= sub+1; if sub = div then sub:= 0; X testcopy:= testcopy-1 X end; X if sub = 0 then found:= 1; X div:= div-1 X end; X if found = 0 then begin X n:= n+1; X printn(test); prints(" is prime number "); printn(n); print X end; X test:= test+1 X end X end X X The syntax is highly flexible, which means it might easily be X changed due to some whim. The source code should be checked X for details and changes before bugs are reported. X X The compiler is written by Per Lindberg, and placed in the X public domain. The Hacker's Ethic applies. It is based on the X VALGOL I compiler published by G.A. Edgar in Dr. Dobb's X Journal May 1985. It was implemented for the purpouse of X demonstrating how a simple compiler works. Therefore, there X are no optimizations or other frills. You might want to add X things to it; go right ahead. Happy hacking! X XFILES X fc.c Source code for the foogol compiler X fc The foogol compiler X rts.c Source code for the Run-Time system X rts.o The Run-Time system X fc.doc This file X bar.foo Your program... X XSEE ALSO X as, ld, cc X XBUGS X There are no scoping rules, all declared variables can be used X throughout the entire program. And although you can have local X declarations in blocks, these declarations are in fact global. X So you can't redeclare a variable. X X Because parsing is by simple recursive-descent and backtracking, X there is only one cheerful error message: "Syntax error". No X hints on missing or superflous semicolons or such hand-holding. X You're supposed to write correct programs in foogol, Buster! X X The output code is extremely naive, and very suitable for X code optimization exercises. X X Finally, please remember that this is just a 500-line toy X compiler, so don't expect too much of it. @//E*O*F foogol.doc// if test 4379 -ne "`wc -c <'foogol.doc'`"; then echo shar: error transmitting "'foogol.doc'" '(should have been 4379 characters)' fi fi # end of overwriting check echo shar: extracting "'foogol.c'" '(13095 characters)' if test -f 'foogol.c' ; then echo shar: will not over-write existing file "'foogol.c'" else sed 's/^X//' >foogol.c <<'@//E*O*F foogol.c//' X/*---------------------------------------------------------------------*\ X! ! X! fc.c Compiler for FOOGOL IV -- version 4.2 Last change:1985-12-02 ! X! Translates FOOGOL IV into VAX/UNIX assembler ! X! ! X! Written by Per Lindberg, QZ, Box 27322, 10254 Stockholm, Sweden. ! X! ! X! This software is in the public domain. The Hacker Ethic applies. ! X! (A postcard from anyone who ports it would be appreciated.) ! X! ! X\*---------------------------------the-mad-programmer-strikes-again----*/ X X#define UNIX X X#ifdef SARG10 /* Sargasso C (under TOPS10/20) peculiarities */ X #strings low X #define _UNIXCON X#endif X X#include X X#define isupper(c) ((c) >= 'A' && (c) <= 'Z') X#define tolower(c) ((c) - 'A' + 'a') X X#define MAXTAB 25 /* Tweak these to your own liking */ X#define MAXTOKEN 80 X X#define WHITESPACE 0 /* These could be turned into enum */ X#define NUMBER 1 X#define LETTER 2 X#define QUOTE 3 X#define SEMICOLON 4 X#define RANDOM 5 X XFILE *inf, *outf; X Xint labelcount = 0, X linecount = 0, X debug = 0; X Xchar token[MAXTOKEN], X pending[MAXTOKEN], X keytab[MAXTAB][MAXTOKEN], X symtab[MAXTAB][MAXTOKEN], X *usage = X#ifdef SARG10 X "usage: '.run fc- [-debug] infile [outfile]'"; X#endif X#ifdef UNIX X "usage: 'fc [-debug] infile [outfile]'"; X#endif X Xmain(argc,argv) int argc; char *argv[]; { X if (argc < 2) error(usage); X if (*argv[1] == '-') { debug = 1; --argc; ++argv; } X if (argc < 2) error(usage); X openinfile(argv[1]); X openoutfile(argv[argc == 2 ? 1 : 2]); X init(); X if (!PROGRAM()) error("Syntax error"); X fclose(inf); X fclose(outf); X} X Xchar *defaultext(fname,ext,force) char *fname, *ext; int force; { X static char result[255]; X char c, *point, *s = result; X strcpy(result,fname); X while (*s) ++s; X point = s; X while (c = *s, s > result && c != '.') --s; X if (c == '.') { /* some extention exists */ X point = s; X if (!force) return result; /* don't worry about what it is */ X } X strcpy(point,ext); /* put default extention after point */ X return result; X} X Xopeninfile(fname) char *fname; { X char *defaultext(); X d("openinfile",defaultext(fname,".foo",0),""); X if ((inf = fopen(defaultext(fname,".foo",0),"r")) == NULL) X error2("Can't open infile", defaultext(fname,".foo",0)); X} X Xopenoutfile(fname) char *fname; { X char *defaultext(); X d("openoutfile",defaultext(fname,".s",1),""); X if ((outf = fopen(defaultext(fname,".s",1),"w")) == NULL) X error2("Can't open outfile", defaultext(fname,".s",1)); X} X Xinit() { X int i; X d("init","",""); X get2(); X gettoken(); X for (i = 0; i < MAXTAB; i++) keytab[i][0] = '\0'; X} X Xerror(msg) char *msg; { X printf("\n\nFoo: %s", msg); X if (linecount) printf(" at line %d",linecount + 1); X printf("\n"); X exit(1); X} X Xerror2(s1,s2) char *s1,*s2; { X static char msg[80]; X sprintf(msg,"%s\"%s\"",s1,s2); X error(msg); X} X Xlowcase(s) char *s; { X char c; X for (c = *s; c = *s; ++s) if (isupper(c)) *s = tolower(c); X} X X/* Basic I/O functions */ X Xint out(line) char *line; { X char c, symb[MAXTOKEN], *subst(), *s = symb; X int printmode = 1, chmode = 1; X while(c = *line++) { X if (c == ' ') { if (chmode) putc('\t',outf); X chmode = 0; X } else { X chmode = 1; X if (c != 39) { if (printmode) putc(c,outf); X else *s++ = c; X } else if (!printmode) { X *s = '\0'; X if (*symb) fprintf(outf,"%s",subst(symb)); X printmode = 1; X } else { X printmode = 0; X s = symb; X } X } X } X putc('\n',outf); X return 1; X} X Xgettoken() { X strcpy(token,pending); get2(); X if (!strcmp("/",token) && !strcmp("*",pending)) { X d("comment:",token,pending); X while (strcmp("*",token) || strcmp("/",pending)) { X strcpy(token,pending); get2(); X d(" ",token,""); X } X strcpy(token,pending); get2(); X strcpy(token,pending); get2(); X } Xd("gettoken returning",token,pending); X} X Xget2() { X int c0, c, typ, count = 1; X char *p = pending; X while((typ=type(c0=getc(inf))) == WHITESPACE) if (c0 == '\n') ++linecount; X if (c0 != EOF) *p++ = c0; X if (typ == QUOTE) { X while ((c = getc(inf)) != EOF && type(c) != QUOTE) { X if (++count == MAXTOKEN) error("String too long"); X *p++ = c; X } X *p++ = '"'; X } X else { X while ((type(c=getc(inf)) == typ X || typ == LETTER && type(c) == NUMBER) X && typ != RANDOM X && c != EOF) { X *p++ = c; X typ = type(c); X if (++count == MAXTOKEN) error("Too long input token"); X } X ungetc(c,inf); X } X *p = '\0'; X} X Xint type(c) int c; { X if (c == EOF) return -1; X if (c >= '0' && c <= '9') return(NUMBER); X if (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z') return(LETTER); X if (c == ' ' || c == '\t' || c == '\n') return(WHITESPACE); /* */ X if (c == '"') return (QUOTE); X if (c == ';') return (SEMICOLON); X return(RANDOM); X} X X/* Basic input matching functions */ X Xint match(s) char *s; { Xd("match",token,s); X lowcase(token); X if (strcmp(s,token)) return 0; X gettoken(); return 1; X} X Xint id(name) char *name; { X int t; X char c, *p = token; X d("id",token,name); X if (type(*p++) != LETTER) return 0; X while (c = *p++) { X t = type(c); X if (t != NUMBER && t != LETTER) return(0); X } X lowcase(token); X enter(name,token); X gettoken(); X return(1); X} X Xint number(name) char *name; { X char c, *p = token; X d("number",token,name); X while (c = *p++) if (type(c) != NUMBER) return(0); X enter(name,token); X gettoken(); X return(1); X} X Xint string(name) char *name; { X d("string",token,name); X if (*token != '"') return 0; X enter(name,token); X gettoken(); X return 1; X} X Xlabel(name) char *name; { X char result[6]; X d("label ",name,""); X sprintf(result,"L%d",labelcount++); X enter(name,result); X} X X/* Internal symbol table */ X Xenter(key,val) char *key, *val; { X int i; X d("enter ",val,key); X for (i = 0; i < MAXTAB; i++) { X if (keytab[i][0] == '\0') { X strcpy(keytab[i],key); X strcpy(symtab[i],val); X return; X } X } X error2("INTERNAL SYMTAB ENTER ERROR, can't enter ", val); X} X Xint lookup(key) char *key; { X int i; X for (i = MAXTAB-1; i >= 0 ; i--) { X if (!strcmp(key,keytab[i])) { X d("lookup ",symtab[i],key); X return i; X } X } X error2("INTERNAL SYMTAB LOOKUP ERROR, can't find ", key); X} X Xchar *subst(key) char *key; { X return symtab[lookup(key)]; X} X Xremove(key) char *key; { X keytab[lookup(key)][0] = '\0'; X} X X/* Syntax definition. This is the neat part! */ X Xint PROGRAM() { d("PROGRAM",token,pending); X if (!match("begin")) return 0; out(" .text # # begin"); X out(" .align 1"); X out(" .globl _main"); X out("_main:"); X out(" .word 0"); X if (!OPT_DECLARATION()) return 0; X if (!STATEMENT()) return 0; X while (match(";")) X if (!STATEMENT()) return 0; X if (!match("end")) return 0; out(" ret # # end"); X return 1; X} X Xint OPT_DECLARATION() { d("OPT_DECLARATION",token,pending); X if (DECLARATION() X && !match(";")) return 0; X return 1; X} X Xint DECLARATION() { d("DECLARATION",token,pending); X if (!match("integer")) return 0; out(" .data 1 # integer"); X if (!ID_SEQUENCE()) return 0; out(" .text"); X return 1; X} X Xint ID_SEQUENCE() { d("ID_SEQUENCE",token,pending); X if (!IDENTIFIER()) return 0; X while (match(",")) X if (!IDENTIFIER()) return 0; X return 1; X} X Xint IDENTIFIER() { d("IDENTIFIER",token,pending); X if (!id("X")) return 0; out("'X': .long 0"); X remove("X"); X return 1; X} X Xint STATEMENT() { d("STATEMENT",token,pending); X return X IO_STATEMENT() X || X WHILE_STATEMENT() X || X COND_STATEMENT() X || X BLOCK() X || /* the order is important here */ X ASSIGN_STATEMENT(); X} X Xint BLOCK() { d("BLOCK",token,pending); X if (!match("begin")) return 0; out(" # # # begin"); X if (DECL_OR_ST()) X while(match(";")) X if (!STATEMENT()) return 0; X if (!match("end")) return 0; out(" # # # end"); X return 1; X} X Xint DECL_OR_ST() { d("DECL_OR_ST",token,pending); X return X DECLARATION() X || X STATEMENT(); X} X Xint IO_STATEMENT() { d("IO_STATEMENT",token,pending); X return X PRINTS_STATEMENT() X || X PRINTN_STATEMENT() X || X PRINT_STATEMENT(); X} X Xint PRINTS_STATEMENT() { d("PRINTS_STATEMENT",token,pending); X if (!match("prints")) return 0; X if (!match("(")) return 0; X if (!string("S")) return 0; label("Ls"); X out(" .data 1 # prints"); X out("'Ls': .asciz 'S'"); X out(" .text"); X out(" pushal 'Ls'"); X out(" calls $1,_PRS"); X remove("S"); remove("Ls"); X if (!match(")")) return 0; X return 1; X} X Xint PRINTN_STATEMENT() { d("PRINTN_STATEMENT",token,pending); X if (!match("printn")) return 0; X if (!match("(")) return 0; X if (!EXPRESSION()) return 0; out(" pushl r0 # printn"); X out(" calls $1,_PRN"); X if (!match(")")) return 0; X return 1; X} X Xint PRINT_STATEMENT() { d("PRINT_STATEMENT",token,pending); X if (!match("print")) return 0; out(" calls $0,_PR # print"); X return 1; X} X Xint COND_STATEMENT() { d("COND_STATEMENT",token,pending); X if (!match("if")) return 0; label("Lt"); label("Le"); label("Lq"); X if (!EXPRESSION()) return 0; out(" tstl r0 # if"); X if (!match("then")) return 0; out(" bneq 'Lq' # then"); X out(" jmp 'Le'"); X out("'Lq':"); X if (!STATEMENT()) return 0; out(" jmp 'Lt'"); X out("'Le': # # # else"); X if (match("else")) X if (!STATEMENT()) return 0; out("'Lt': # # # endif"); X remove("Lt");remove("Le");remove("Lq"); X return 1; X} X Xint WHILE_STATEMENT() { d("WHILE_STATEMENT",token,pending); X if (!match("while")) return 0; label("Lw"); label("Lx"); label("Lv"); X out("'Lw': # # # while"); X if (!EXPRESSION()) return 0; out(" tstl r0"); X if (!match("do")) return 0; out(" bneq 'Lv'"); X out(" jmp 'Lx'"); X out("'Lv': # # # do"); X if(!STATEMENT()) return 0; out(" jmp 'Lw'"); X out("'Lx': # # # endwhile"); X remove("Lw");remove("Lx");remove("Lv"); X return 1; X} X Xint ASSIGN_STATEMENT() { d("ASSIGN_STATEMENT",token,pending); X if (!id("Var")) return 0; X if (!match(":")) return 0; X if (!match("=")) return 0; X if (!EXPRESSION()) return 0; out(" movl r0,'Var' # 'Var':="); X remove("Var"); X return 1; X} X Xint EXPRESSION() { d("EXPRESSION",token,pending); X if (!EXPR1()) return 0; X if (!OPT_RHS()) return 0; X return 1; X} X Xint OPT_RHS() { d("OPT_RHS",token,pending); X return X RHS_EQ() X || X RHS_NEQ() X || X 1; X} X Xint RHS_EQ() { d("RHS_EQ",token,pending); X if (!match("=")) return 0; label("L="); label("Ly"); X out(" pushl r0 # ="); X if (!EXPR1()) return 0; out(" cmpl (sp)+,r0"); X out(" beql 'L='"); X out(" movl $0,r0"); X out(" jmp 'Ly'"); X out("'L=': movl $1,r0"); X out("'Ly':"); X remove("L="); remove("Ly"); X return 1; X} X Xint RHS_NEQ() { d("RHS_NEQ",token,pending); X if (!match("#")) return 0; label("L#"); label("Lz"); X out(" pushl r0 # <>"); X if (!EXPR1()) return 0; out(" cmpl (sp)+,r0"); X out(" beql 'L#'"); X out(" movl $1,r0"); X out(" jmp 'Lz'"); X out("'L#': movl $0,r0"); X out("'Lz':"); X remove("L#"); remove("Lz"); X return 1; X} X Xint SIGNED_TERM() { d("SIGNED_TERM",token,pending); X return X PLUS_TERM() X || X MINUS_TERM(); X} X Xint PLUS_TERM() { d("PLUS_TERM",token,pending); X if (!match("+")) return 0; out(" pushl r0 # +term"); X if (!TERM()) return 0; out(" addl2 (sp)+,r0"); X return 1; X} X Xint MINUS_TERM() { d("MINUS_TERM",token,pending); X if (!match("-")) return 0; out(" pushl r0 # -term"); X if (!TERM()) return 0; out(" subl3 r0,(sp)+,r0"); X return 1; X} X Xint TERM() { d("TERM",token,pending); X if (!PRIMARY()) return 0; X while (match("*")) { out(" pushl r0 # *"); X if (!PRIMARY()) return 0; out(" mull2 (sp)+,r0"); X } X return 1; X} X Xint PRIMARY() { d("PRIMARY",token,pending); X if (id("Z")) { out(" movl 'Z',r0"); X remove("Z"); X return 1; X } X if (number("Z")) { out(" movl $'Z',r0"); X remove("Z"); X return 1; X } X if (match("(")) { X if (!EXPRESSION()) return 0; X if (!match(")")) return 0; X return 1; X } X return 0; X} X Xint EXPR1() { d("EXPR1",token,pending); X if (!TERM()) return 0; X while(SIGNED_TERM()); X return 1; X} X X/* And finally, the debug function... */ X Xint d(s1,s2,s3) char *s1,*s2,*s3; { X if (debug) { X printf("%s",s1); X if (*s2) printf(" \"%s\"",s2); X if (*s3) printf(" \"%s\"",s3); X putchar('\n'); X } X return 1; X} @//E*O*F foogol.c// if test 13095 -ne "`wc -c <'foogol.c'`"; then echo shar: error transmitting "'foogol.c'" '(should have been 13095 characters)' fi fi # end of overwriting check echo shar: "End of shell archive." exit 0