cfoogol: | back to bertnase | |
[ To foogol from Volume 8, Issue 88. ]
Path: maths.tcd.ie!ieunet!EU.net!uunet!sparky!not-for-mail From: cowan@snark.thyrsus.com (John Cowan) Newsgroups: comp.sources.misc Subject: v42i088: cfoogol - A compiler for a tiny ALGOL-like language, Part01/01 Followup-To: comp.sources.d Date: 8 May 1994 14:43:25 -0500 Organization: Sterling Software Lines: 714 Sender: kent@sparky.sterling.com Approved: kent@sparky.sterling.com Message-ID: <2qjfct$645@sparky.sterling.com> NNTP-Posting-Host: sparky.sterling.com X-Md4-Signature: bd3f761f9e92eb8683f8e819fbdfeb03 Submitted-by: cowan@snark.thyrsus.com (John Cowan) Posting-number: Volume 42, Issue 88 Archive-name: cfoogol/part01 Environment: C This is an upgrade of the FOOGOL compiler originally posted to mod.sources, and now available in volume 8 of comp.sources.unix. It seems to me, however, that it is better suited to comp.sources.misc. This version generates C rather than VAX assembly language output. The FOOGOL language is unchanged. Since the whole thing exists purely for hack value, I have not bothered with Makefiles, READMEs, or other such. The original poster didn't either. Read foogol.doc to know everything I do about the entire project; conversion to C was so easy that I didn't even have to fully understand foogol.c John Cowan Logical Language Group---------- #! /bin/sh # This is a shell archive. Remove anything before this line, then feed it # into a shell via "sh file" or similar. To overwrite existing files, # type "sh file -c". # Contents: foogol.c foogol.doc # Wrapped by kent@sparky on Sun May 8 14:39:05 1994 PATH=/bin:/usr/bin:/usr/ucb:/usr/local/bin:/usr/lbin:$PATH ; export PATH echo If this archive is complete, you will see the following message: echo ' "shar: End of archive 1 (of 1)."' if test -f 'foogol.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'foogol.c'\" else echo shar: Extracting \"'foogol.c'\" \(11483 characters\) sed "s/^X//" >'foogol.c' <<'END_OF_FILE' X/*---------------------------------------------------------------------*\ X! ! X! fc.c Compiler for FOOGOL IV -- version 5.0 Last change:1994-01-12 ! X! Translates FOOGOL IV into stupid but portable C ! X! ! X! Written by Per Lindberg, QZ, Box 27322, 10254 Stockholm, Sweden. ! X! C code generation by John Cowan ! X! ! X! This software is in the public domain. The Hacker Ethic applies. ! X! (A postcard from anyone who ports it would be appreciated.) ! X! ! X\*---------------------------------the-mad-programmer-strikes-again----*/ X X#define UNIX X X X#include X X#define isupper(c) ((c) >= 'A' && (c) <= 'Z') X#define tolower(c) ((c) - 'A' + 'a') X X#define MAXTAB 25 /* Tweak these to your own liking */ X#define MAXTOKEN 80 X X#define WHITESPACE 0 /* These could be turned into enum */ X#define NUMBER 1 X#define LETTER 2 X#define QUOTE 3 X#define SEMICOLON 4 X#define RANDOM 5 X XFILE *inf, *outf; X Xint labelcount = 0, X linecount = 0, X debug = 0; X Xchar token[MAXTOKEN], X pending[MAXTOKEN], X keytab[MAXTAB][MAXTOKEN], X symtab[MAXTAB][MAXTOKEN], X *usage = X "usage: 'fc [-debug] infile [outfile]'"; X Xmain(argc,argv) int argc; char *argv[]; { X if (argc < 2) error(usage); X if (*argv[1] == '-') { debug = 1; --argc; ++argv; } X if (argc < 2) error(usage); X openinfile(argv[1]); X openoutfile(argv[argc == 2 ? 1 : 2]); X init(); X if (!PROGRAM()) error("Syntax error"); X fclose(inf); X fclose(outf); X} X Xchar *defaultext(fname,ext,force) char *fname, *ext; int force; { X static char result[255]; X char c, *point, *s = result; X strcpy(result,fname); X while (*s) ++s; X point = s; X while (c = *s, s > result && c != '.') --s; X if (c == '.') { /* some extension exists */ X point = s; X if (!force) return result; /* don't worry about what it is */ X } X strcpy(point,ext); /* put default extension after point */ X return result; X} X Xopeninfile(fname) char *fname; { X char *defaultext(); X d("openinfile",defaultext(fname,".foo",0),""); X if ((inf = fopen(defaultext(fname,".foo",0),"r")) == NULL) X error2("Can't open infile", defaultext(fname,".foo",0)); X} X Xopenoutfile(fname) char *fname; { X char *defaultext(); X d("openoutfile",defaultext(fname,".c",1),""); X if ((outf = fopen(defaultext(fname,".c",1),"w")) == NULL) X error2("Can't open outfile", defaultext(fname,".c",1)); X} X Xinit() { X int i; X d("init","",""); X get2(); X gettoken(); X for (i = 0; i < MAXTAB; i++) keytab[i][0] = '\0'; X} X Xerror(msg) char *msg; { X printf("\n\nFoo: %s", msg); X if (linecount) printf(" at line %d",linecount + 1); X printf("\n"); X exit(1); X} X Xerror2(s1,s2) char *s1,*s2; { X static char msg[80]; X sprintf(msg,"%s\"%s\"",s1,s2); X error(msg); X} X Xlowcase(s) char *s; { X char c; X for (c = *s; c = *s; ++s) if (isupper(c)) *s = tolower(c); X} X X/* Basic I/O functions */ X Xint out(line) char *line; { X char c, symb[MAXTOKEN], *subst(), *s = symb; X int printmode = 1, chmode = 1; X while(c = *line++) { X if (c == ' ') { if (chmode) putc('\t',outf); X chmode = 0; X } else { X chmode = 1; X if (c != 39) { if (printmode) putc(c,outf); X else *s++ = c; X } else if (!printmode) { X *s = '\0'; X if (*symb) fprintf(outf,"%s",subst(symb)); X printmode = 1; X } else { X printmode = 0; X s = symb; X } X } X } X putc('\n',outf); X return 1; X} X Xgettoken() { X strcpy(token,pending); get2(); X if (!strcmp("/",token) && !strcmp("*",pending)) { X d("comment:",token,pending); X while (strcmp("*",token) || strcmp("/",pending)) { X strcpy(token,pending); get2(); X d(" ",token,""); X } X strcpy(token,pending); get2(); X strcpy(token,pending); get2(); X } Xd("gettoken returning",token,pending); X} X Xget2() { X int c0, c, typ, count = 1; X char *p = pending; X while((typ=type(c0=getc(inf))) == WHITESPACE) if (c0 == '\n') ++linecount; X if (c0 != EOF) *p++ = c0; X if (typ == QUOTE) { X while ((c = getc(inf)) != EOF && type(c) != QUOTE) { X if (++count == MAXTOKEN) error("String too long"); X *p++ = c; X } X *p++ = '"'; X } X else { X while ((type(c=getc(inf)) == typ X || typ == LETTER && type(c) == NUMBER) X && typ != RANDOM X && c != EOF) { X *p++ = c; X typ = type(c); X if (++count == MAXTOKEN) error("Too long input token"); X } X ungetc(c,inf); X } X *p = '\0'; X} X Xint type(c) int c; { X if (c == EOF) return -1; X if (c >= '0' && c <= '9') return(NUMBER); X if (c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z') return(LETTER); X if (c == ' ' || c == '\t' || c == '\n') return(WHITESPACE); /* */ X if (c == '"') return (QUOTE); X if (c == ';') return (SEMICOLON); X return(RANDOM); X} X X/* Basic input matching functions */ X Xint match(s) char *s; { Xd("match",token,s); X lowcase(token); X if (strcmp(s,token)) return 0; X gettoken(); return 1; X} X Xint id(name) char *name; { X int t; X char c, *p = token; X d("id",token,name); X if (type(*p++) != LETTER) return 0; X while (c = *p++) { X t = type(c); X if (t != NUMBER && t != LETTER) return(0); X } X lowcase(token); X enter(name,token); X gettoken(); X return(1); X} X Xint number(name) char *name; { X char c, *p = token; X d("number",token,name); X while (c = *p++) if (type(c) != NUMBER) return(0); X enter(name,token); X gettoken(); X return(1); X} X Xint string(name) char *name; { X d("string",token,name); X if (*token != '"') return 0; X enter(name,token); X gettoken(); X return 1; X} X Xlabel(name) char *name; { X char result[6]; X d("label ",name,""); X sprintf(result,"L%d",labelcount++); X enter(name,result); X} X X/* Internal symbol table */ X Xenter(key,val) char *key, *val; { X int i; X d("enter ",val,key); X for (i = 0; i < MAXTAB; i++) { X if (keytab[i][0] == '\0') { X strcpy(keytab[i],key); X strcpy(symtab[i],val); X return; X } X } X error2("INTERNAL SYMTAB ENTER ERROR, can't enter ", val); X} X Xint lookup(key) char *key; { X int i; X for (i = MAXTAB-1; i >= 0 ; i--) { X if (!strcmp(key,keytab[i])) { X d("lookup ",symtab[i],key); X return i; X } X } X error2("INTERNAL SYMTAB LOOKUP ERROR, can't find ", key); X} X Xchar *subst(key) char *key; { X return symtab[lookup(key)]; X} X XRemove(key) char *key; { X keytab[lookup(key)][0] = '\0'; X} X X/* Syntax definition. This is the neat part! */ X Xint PROGRAM() { d("PROGRAM",token,pending); X if (!match("begin")) return 0; out("#include "); X out("main() {"); X if (!OPT_DECLARATION()) return 0; X if (!STATEMENT()) return 0; X while (match(";")) X if (!STATEMENT()) return 0; X if (!match("end")) return 0; out("}"); X return 1; X} X Xint OPT_DECLARATION() { d("OPT_DECLARATION",token,pending); X if (DECLARATION() X && !match(";")) return 0; X return 1; X} X Xint DECLARATION() { d("DECLARATION",token,pending); X if (!match("integer")) return 0; out("int"); X if (!ID_SEQUENCE()) return 0; out(";"); X return 1; X} X Xint ID_SEQUENCE() { d("ID_SEQUENCE",token,pending); X if (!IDENTIFIER()) return 0; X while (match(",")) { X out(","); X if (!IDENTIFIER()) return 0; X } X return 1; X} X Xint IDENTIFIER() { d("IDENTIFIER",token,pending); X if (!id("X")) return 0; out("'X'"); X Remove("X"); X return 1; X} X Xint STATEMENT() { d("STATEMENT",token,pending); X return X IO_STATEMENT() X || X WHILE_STATEMENT() X || X COND_STATEMENT() X || X BLOCK() X || /* the order is important here */ X ASSIGN_STATEMENT(); X} X Xint BLOCK() { d("BLOCK",token,pending); X if (!match("begin")) return 0; out("{"); X if (DECL_OR_ST()) X while(match(";")) X if (!STATEMENT()) return 0; X if (!match("end")) return 0; out("}"); X return 1; X} X Xint DECL_OR_ST() { d("DECL_OR_ST",token,pending); X return X DECLARATION() X || X STATEMENT(); X} X Xint IO_STATEMENT() { d("IO_STATEMENT",token,pending); X return X PRINTS_STATEMENT() X || X PRINTN_STATEMENT() X || X PRINT_STATEMENT(); X} X Xint PRINTS_STATEMENT() { d("PRINTS_STATEMENT",token,pending); X if (!match("prints")) return 0; X if (!match("(")) return 0; X if (!string("S")) return 0; X out("printf(\"%s\", 'S');"); X Remove("S"); X if (!match(")")) return 0; X return 1; X} X Xint PRINTN_STATEMENT() { d("PRINTN_STATEMENT",token,pending); X if (!match("printn")) return 0; X if (!match("(")) return 0; out("printf(\"%d\","); X if (!EXPRESSION()) return 0; out(");"); X if (!match(")")) return 0; X return 1; X} X Xint PRINT_STATEMENT() { d("PRINT_STATEMENT",token,pending); X if (!match("print")) return 0; out("printf(\"\\n\");"); X return 1; X} X Xint COND_STATEMENT() { d("COND_STATEMENT",token,pending); X if (!match("if")) return 0; out("if ("); X if (!EXPRESSION()) return 0; out(")"); X if (!match("then")) return 0; X if (!STATEMENT()) return 0; X if (match("else")) { X out (" else"); X if (!STATEMENT()) return 0; X } X return 1; X} X Xint WHILE_STATEMENT() { d("WHILE_STATEMENT",token,pending); X if (!match("while")) return 0; X out("while("); X if (!EXPRESSION()) return 0; out(")"); X if (!match("do")) return 0; X if(!STATEMENT()) return 0; X return 1; X} X Xint ASSIGN_STATEMENT() { d("ASSIGN_STATEMENT",token,pending); X if (!id("Var")) return 0; out("'Var' ="); X if (!match(":")) return 0; X if (!match("=")) return 0; X if (!EXPRESSION()) return 0; out(";"); X Remove("Var"); X return 1; X} X Xint EXPRESSION() { d("EXPRESSION",token,pending); X if (!EXPR1()) return 0; X if (!OPT_RHS()) return 0; X return 1; X} X Xint OPT_RHS() { d("OPT_RHS",token,pending); X return X RHS_EQ() X || X RHS_NEQ() X || X 1; X} X Xint RHS_EQ() { d("RHS_EQ",token,pending); X if (!match("=")) return 0; X out("=="); X if (!EXPR1()) return 0; X return 1; X} X Xint RHS_NEQ() { d("RHS_NEQ",token,pending); X if (!match("#")) return 0; X out("!="); X if (!EXPR1()) return 0; X return 1; X} X Xint SIGNED_TERM() { d("SIGNED_TERM",token,pending); X return X PLUS_TERM() X || X MINUS_TERM(); X} X Xint PLUS_TERM() { d("PLUS_TERM",token,pending); X if (!match("+")) return 0; out("+"); X if (!TERM()) return 0; X return 1; X} X Xint MINUS_TERM() { d("MINUS_TERM",token,pending); X if (!match("-")) return 0; out("-"); X if (!TERM()) return 0; X return 1; X} X Xint TERM() { d("TERM",token,pending); X if (!PRIMARY()) return 0; X while (match("*")) { out("*"); X if (!PRIMARY()) return 0; X } X return 1; X} X Xint PRIMARY() { d("PRIMARY",token,pending); X if (id("Z")) { out("'Z'"); X Remove("Z"); X return 1; X } X if (number("Z")) { out("'Z'"); X Remove("Z"); X return 1; X } X if (match("(")) { X out("("); X if (!EXPRESSION()) return 0; X if (!match(")")) return 0; X out(")"); X return 1; X } X return 0; X} X Xint EXPR1() { d("EXPR1",token,pending); X if (!TERM()) return 0; X while(SIGNED_TERM()); X return 1; X} X X/* And finally, the debug function... */ X Xint d(s1,s2,s3) char *s1,*s2,*s3; { X if (debug) { X printf("%s",s1); X if (*s2) printf(" \"%s\"",s2); X if (*s3) printf(" \"%s\"",s3); X putchar('\n'); X } X return 1; X} END_OF_FILE if test 11483 -ne `wc -c <'foogol.c'`; then echo shar: \"'foogol.c'\" unpacked with wrong size! fi # end of 'foogol.c' fi if test -f 'foogol.doc' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'foogol.doc'\" else echo shar: Extracting \"'foogol.doc'\" \(4086 characters\) sed "s/^X//" >'foogol.doc' <<'END_OF_FILE' Xfc.doc Last modified: 1994-01-12 X X X The FOOGOL-IV compiler X release notes and documentation X Per Lindberg, QZ X The mad programmer strikes again! X X Version 5.0 changes by X John Cowan X XNAME X fc - foogol compiler X XSYNOPSIS X fc [ -d ] infile [ outfile ] X XDESCRIPTION X fc compiles a foogol program into ugly but portable C. X Default extensions are ".foo" for the source file and ".c" X for the compiled file. In other words, the resulting outfile X is C language, and can be assembled and linked with the X vanilla UNIX cc program. X X Options: (There is only one switch so far...) X X -d Sets the debug option, which makes the compiler print X out internal diagnostics. Useful for debugging and X understanding the compiler. X X The foogol object code has to be linked X with the C library in order to be able to do I/O. X Example: X fc foo X cc foo.c -o foo X X You can make the C code more readable with: X cb -j -s foo.c X which eliminates superfluous newlines and imposes K&R style. X X The foogol language is basically a very small ALGOL. The X current syntactic elements are: X X PROGRAM ::= begin X [ DECLARATION ; ] X STATEMENT [ ; STATEMENT ]... X end X X DECLARATION ::= integer ID_SEQUENCE X X ID_SEQUENCE ::= IDENTIFIER [ , IDENTIFIER ] X X STATEMENT ::= IO_STATEMENT X ! WHILE_STATEMENT X ! COND_STATEMENT X ! BLOCK X ! ASSIGN_STATEMENT X X BLOCK ::= begin X [ DECLARATION ] X [ ; STATEMENT ]... X end X X IO_STATEMENT ::= prints ( STRING ) X ! printn ( EXPRESSION ) X ! print X X COND_STATEMENT ::= if EXPRESSION then STATEMENT X [ else STATEMENT ] X X WHILE_STATEMENT ::= while EXPRESSION do STATEMENT X X ASSIGN_STATEMENT::= IDENTIFIER := EXPRESSION X X EXPRESSION ::= EXPR1 [ RHS ] X X RHS ::= = EXPR1 X ! # EXPR1 X X SIGNED_TERM ::= + TERM X ! - TERM X X TERM ::= PRIMARY [ * PRIMARY ]... X X PRIMARY ::= IDENTIFIER X ! NUMBER X ! ( EXPRESSION ) X X EXPR1 ::= TERM [ SIGNED_TERM ]... X X IDENTIFIER ::= X X NUMBER ::= X X STRING ::= X X Example program: X X begin X integer n, div, sub, test, testcopy, found, max; X test := 2; max := 10; /* number of primes wanted */ X while n # max do begin X div:= test-1; found:= 0; X while div-1 do begin X testcopy:= test; sub:= 0; X while testcopy do begin X sub:= sub+1; if sub = div then sub:= 0; X testcopy:= testcopy-1 X end; X if sub = 0 then found:= 1; X div:= div-1 X end; X if found = 0 then begin X n:= n+1; X printn(test); prints(" is prime number "); printn(n); print X end; X test:= test+1 X end X end X X The syntax is highly flexible, which means it might easily be X changed due to some whim. The source code should be checked X for details and changes before bugs are reported. X X The compiler is written by Per Lindberg, and placed in the X public domain. The Hacker's Ethic applies. It is based on the X VALGOL I compiler published by G.A. Edgar in Dr. Dobb's X Journal May 1985. It was implemented for the purpose of X demonstrating how a simple compiler works. Therefore, there X are no optimizations or other frills. You might want to add X things to it; go right ahead. Happy hacking! X X This version (5.0) is the first to generate C. Previous versions X generated Unix-style assembly language for the VAX. Since X VAXen are nearly defunct, C seemed a better choice of output X language. The necessary changes were very easy. X XFILES X fc.c Source code for the foogol compiler X fc The foogol compiler X fc.doc This file X bar.foo Your program... X XSEE ALSO X cc, cb X XBUGS X X Because parsing is by simple recursive-descent and backtracking, X there is only one cheerful error message: "Syntax error". No X hints on missing or superflous semicolons or such hand-holding. X You're supposed to write correct programs in foogol, Buster! X X Please remember that this is just a 500-line toy X compiler, so don't expect too much of it. END_OF_FILE if test 4086 -ne `wc -c <'foogol.doc'`; then echo shar: \"'foogol.doc'\" unpacked with wrong size! fi # end of 'foogol.doc' fi echo shar: End of archive 1 \(of 1\). cp /dev/null ark1isdone MISSING="" for I in 1 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have the archive. rm -f ark[1-9]isdone else echo You still must unpack the following archives: echo " " ${MISSING} fi exit 0 exit 0 # Just in case...