#include <stdlib.h>

#include <stdio.h>
#include <stdarg.h>

#include <ctype.h>

#include <string.h>

#include <errno.h>

#include <stdbool.h>

#include <limits.h>

#define NELEMS(arr) (sizeof(arr) / sizeof(arr[0]))

#define	da_dim(name, type) type *name = NULL; \ int qy ## name ## _p = 0; \

int qy ## name ## _max = 0
#define da_rewind(name) qy ## name ## _p = 0

#define da_redim(name) do {if (qy ## name ## p >= qy ## name ## max) \

name = realloc(name, (qy ## name ## _max += 32) * sizeof(name[0]));} while

(0)

#define da_append(name, x) do {da_redim(name); name[_qy ## name ## p++] = x;} while

(0)
#define da_len(name) qy ## name ## _p

typedef enum {

tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss,

tk_Leq,

tk_Gtr, tk_Geq, tk_Eq, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While,

tk_Print, tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma,

tk_Ident, tk_Integer, tk_String

} TokenType;

typedef struct {

TokenType tok;

int err_ln, err_col;

union {

int n; / value for constants /

char text; / text for idents */
};

} tok_s;

static FILE source_fp, dest_fp;

static int line = 1, col = 0, the_ch = ' '; da_dim(text, char);


tok_s gettok();


static void error(int err_line, int err_col, const char *fmt, ... ) { char buf[1000];

va_list ap;


va_start(ap, fmt);

vsprintf(buf, fmt, ap);

va_end(ap);


printf("(%d,%d) error: %s\n", err_line, err_col, buf); exit(1);

}


static int next_ch() { / get next char from input / the_ch = getc(source_fp);

++col;

if (the_ch == '\n') {

++line;

col = 0;

}

return the_ch;

}


static tok_s char_lit(int n, int err_line, int err_col) { / 'x' / if (the_ch == '\'')


error(err_line, err_col, "gettok: empty character constant"); if (the_ch == '\\') {

next_ch();

if (the_ch == 'n')

n = 10;

else if (the_ch == '\\')

n = '\\';

else error(err_line, err_col, "gettok: unknown escape sequence \\%c",

the_ch);

}

if (next_ch() != '\'')


error(err_line, err_col, "multi-character constant"); next_ch();

return (tok_s){tk_Integer, err_line, err_col, {n}

};

}


static tok_s div_or_cmt(int err_line, int err_col) { / process divide or comments / if (the_ch != '*')

return (tok_s){tk_Div, err_line, err_col, {0}

};


/ comment found /

next_ch();

for (;;) {

if (the_ch == '*') {

if (next_ch() == '/') {

next_ch();

return gettok();

}

}

else if (the_ch == EOF)

error(err_line, err_col, "EOF in comment");

else

next_ch();

}

}


static tok_s string_lit(int start, int err_line, int err_col) { / "st" / da_rewind(text);


while (next_ch() != start) {


if (the_ch == '\n') error(err_line, err_col, "EOL in string"); if (the_ch == EOF) error(err_line, err_col, "EOF in string"); da_append(text, (char)the_ch);

}

da_append(text, '\0');


next_ch();

return (tok_s){tk_String, err_line, err_col, {.text=text}};

}


static int kwd_cmp(const void p1, const void p2) { return strcmp(*(char *)p1, (char **)p2);

}


static TokenType get_ident_type(const char *ident) {

static struct {

char *s;

TokenType sym;


} kwds[] = { {"else", tk_Else}, {"if", tk_If}, {"print", tk_Print}, {"putc", tk_Putc}, {"while", tk_While}, }, *kwp;


return (kwp = bsearch(&ident, kwds, NELEMS(kwds), sizeof(kwds[0]), kwd_cmp)) == NULL

?	tk_Ident : kwp->sym;

}


static tok_s ident_or_int(int err_line, int err_col) { int n, is_number = true;


da_rewind(text);

while (isalnum(the_ch) || the_ch == '_') {

da_append(text, (char)the_ch);

if (!isdigit(the_ch))

is_number = false;

next_ch();

}
 

if (da_len(text) == 0)


error(err_line, err_col, "gettok: unrecognized character (%d) '%c'\n", the_ch, the_ch);

da_append(text, '\0');

if (isdigit(text[0])) {

if (!is_number)


error(err_line, err_col, "invalid number: %s\n", text); n = strtol(text, NULL, 0);

if (n == LONG_MAX && errno == ERANGE)


error(err_line, err_col, "Number exceeds maximum value"); return (tok_s){tk_Integer, err_line, err_col, {n}}; }


return (tok_s){get_ident_type(text), err_line, err_col, {.text=text}};

}


static tok_s follow(int expect, TokenType ifyes, TokenType ifno, int err_line, int

err_col) { / look ahead for '>=', etc. /

if (the_ch == expect) {

next_ch();

return (tok_s){ifyes, err_line, err_col, {0}};

}

if (ifno == tk_EOI)


error(err_line, err_col, "follow: unrecognized character '%c' (%d)\n", the_ch, the_ch);


return (tok_s){ifno, err_line, err_col, {0}}; }


tok_s gettok() { / return the token type /

/ skip white space /

while (isspace(the_ch))

next_ch();

int err_line = line;

int err_col = col;

switch (the_ch) {

case '{': next_ch(); return (tok_s){tk_Lbrace, err_line, err_col, {0}};

case '}': next_ch(); return (tok_s){tk_Rbrace, err_line, err_col, {0}};

case '(': next_ch(); return (tok_s){tk_Lparen, err_line, err_col, {0}};

case ')': next_ch(); return (tok_s){tk_Rparen, err_line, err_col, {0}};

case '+': next_ch(); return (tok_s){tk_Add, err_line, err_col, {0}};

case '-': next_ch(); return (tok_s){tk_Sub, err_line, err_col, {0}};

case '*': next_ch(); return (tok_s){tk_Mul, err_line, err_col, {0}};

case '%': next_ch(); return (tok_s){tk_Mod, err_line, err_col, {0}};

case ';': next_ch(); return (tok_s){tk_Semi, err_line, err_col, {0}};

case ',': next_ch(); return (tok_s){tk_Comma,err_line, err_col, {0}};

case '/': next_ch(); return div_or_cmt(err_line, err_col);

case '\'': next_ch(); return char_lit(the_ch, err_line, err_col);

case '<': next_ch(); return follow('=', tk_Leq, tk_Lss, err_line, err_col);

case '>': next_ch(); return follow('=', tk_Geq, tk_Gtr, err_line, err_col);

case '=': next_ch(); return follow('=', tk_Eq, tk_Assign, err_line, err_col);

case '!': next_ch(); return follow('=', tk_Neq, tk_Not, err_line, err_col);

case '&': next_ch(); return follow('&', tk_And, tk_EOI, err_line, err_col);

case '|': next_ch(); return follow('|', tk_Or, tk_EOI, err_line, err_col);

case '"' : return string_lit(the_ch, err_line, err_col);

default: return ident_or_int(err_line, err_col);

case EOF: return (tok_s){tk_EOI, err_line, err_col, {0}};

}

}

void run() { / tokenize the given input /

tok_s tok;

do {

tok = gettok();

fprintf(dest_fp, "%5d %5d %.15s",

tok.err_ln, tok.err_col,
&"End_of_input Op_multiply Op_divide Op_mod Op_add "

"Op_subtract Op_negate Op_not Op_less Op_lessequal "

"Op_greater Op_greaterequal Op_equal Op_notequal Op_assign "

"Op_and Op_or Keyword_if Keyword_else Keyword_while "

"Keyword_print Keyword_putc LeftParen RightParen LeftBrace "

"RightBrace Semicolon Comma Identifier Integer "
"String "

[tok.tok * 16]);

if (tok.tok == tk_Integer) fprintf(dest_fp, " %4d", tok.n); else if (tok.tok == tk_Ident) fprintf(dest_fp, " %s", tok.text);

else if (tok.tok == tk_String) fprintf(dest_fp, " \"%s\"", tok.text); fprintf(dest_fp, "\n");

} while (tok.tok != tk_EOI); if (dest_fp != stdout) fclose(dest_fp);

}

void init_io(FILE *fp, FILE std, const char mode[], const char fn[]) { if (fn[0] == '\0')

*fp = std;

else if ((*fp = fopen(fn, mode)) == NULL)

error(0, 0, "Can't open %s\n", fn);

}

int main(int argc, char *argv[]) {

init_io(&source_fp, stdin, "r", argc > 1 ? argv[1] : "");

init_io(&dest_fp, stdout, "wb", argc > 2 ? argv[2] : "");

run();

return 0;

}