script

script.git
git clone git://git.lenczewski.org/script.git
Log | Files | Refs

libscript.c (26558B)


      1 #include "libscript_internal.h"
      2 
      3 /* tokeniser (lexer)
      4  * ===========================================================================
      5  */
      6 
      7 static int
      8 try_tokenise_keyword(char *src, char *end, struct script_token *out)
      9 {
     10 	size_t len = end - src;
     11 
     12 	if (len == strlen("u8") && strncmp(src, "u8", len) == 0) {
     13 		out->type = SCR_TOKEN_U8;
     14 		return 0;
     15 	} else if (len == strlen("u16") && strncmp(src, "u16", len) == 0) {
     16 		out->type = SCR_TOKEN_U16;
     17 		return 0;
     18 	} else if (len == strlen("u32") && strncmp(src, "u32", len) == 0) {
     19 		out->type = SCR_TOKEN_U32;
     20 		return 0;
     21 	} else if (len == strlen("u64") && strncmp(src, "u64", len) == 0) {
     22 		out->type = SCR_TOKEN_U64;
     23 		return 0;
     24 	} else if (len == strlen("s8") && strncmp(src, "s8", len) == 0) {
     25 		out->type = SCR_TOKEN_S8;
     26 		return 0;
     27 	} else if (len == strlen("s16") && strncmp(src, "s16", len) == 0) {
     28 		out->type = SCR_TOKEN_S16;
     29 		return 0;
     30 	} else if (len == strlen("s32") && strncmp(src, "s32", len) == 0) {
     31 		out->type = SCR_TOKEN_S32;
     32 		return 0;
     33 	} else if (len == strlen("s64") && strncmp(src, "s64", len) == 0) {
     34 		out->type = SCR_TOKEN_S64;
     35 		return 0;
     36 	} else if (len == strlen("f32") && strncmp(src, "f32", len) == 0) {
     37 		out->type = SCR_TOKEN_F32;
     38 		return 0;
     39 	} else if (len == strlen("f64") && strncmp(src, "f64", len) == 0) {
     40 		out->type = SCR_TOKEN_F64;
     41 		return 0;
     42 	} else if (len == strlen("c8") && strncmp(src, "c8", len) == 0) {
     43 		out->type = SCR_TOKEN_C8;
     44 		return 0;
     45 	}
     46 
     47 	if (len == strlen("return") && strncmp(src, "return", len) == 0) {
     48 		out->type = SCR_TOKEN_RETURN;
     49 		return 0;
     50 	} else if (len == strlen("if") && strncmp(src, "if", len) == 0) {
     51 		out->type = SCR_TOKEN_IF;
     52 		return 0;
     53 	} else if (len == strlen("else") && strncmp(src, "else", len) == 0) {
     54 		out->type = SCR_TOKEN_ELSE;
     55 		return 0;
     56 	} else if (len == strlen("while") && strncmp(src, "while", len) == 0) {
     57 		out->type = SCR_TOKEN_WHILE;
     58 		return 0;
     59 	}
     60 
     61 	return -1;
     62 }
     63 
     64 static int
     65 tokenise(struct compile_ctx *ctx)
     66 {
     67 	if (ctx->verbose)
     68 		dbglog(ctx, "info: tokenising %zu bytes of source\n", ctx->len);
     69 
     70 	char *src = ctx->src, *end = ctx->src + ctx->len;
     71 
     72 	struct script_token *token;
     73 	char *buf = src, *buf_end = src;
     74 
     75 	while (src < end) {
     76 		char lookahead[] = {
     77 			src[0],
     78 			(src + 1 < end) ? src[1] : '\0',
     79 			(src + 2 < end) ? src[2] : '\0',
     80 		};
     81 
     82 		if (isspace(lookahead[0]))
     83 			goto next_char;
     84 
     85 		switch (lookahead[0]) {
     86 		/* these single char sequences map directly to a unique token */
     87 		case SCR_TOKEN_LPAREN:	case SCR_TOKEN_RPAREN:
     88 		case SCR_TOKEN_LBRACK:	case SCR_TOKEN_RBRACK:
     89 		case SCR_TOKEN_LBRACE:	case SCR_TOKEN_RBRACE:
     90 		case SCR_TOKEN_LANGLE:	case SCR_TOKEN_RANGLE:
     91 		case SCR_TOKEN_LSLASH:	case SCR_TOKEN_RSLASH:
     92 		case SCR_TOKEN_SEMICOLON:
     93 		case SCR_TOKEN_DOT:	case SCR_TOKEN_COMMA:
     94 		case SCR_TOKEN_PLUS:	case SCR_TOKEN_MINUS:
     95 		case SCR_TOKEN_STAR:
     96 			token = token_stream_alloc(&ctx->stream);
     97 			assert(token);
     98 
     99 			token->type = (enum script_token_type) lookahead[0];
    100 			goto next_char;
    101 
    102 		/* TODO: multi-char sequences map to unique tokens */
    103 		case SCR_TOKEN_COLON:
    104 			token = token_stream_alloc(&ctx->stream);
    105 			assert(token);
    106 
    107 			if (lookahead[1] == SCR_TOKEN_COLON) {
    108 				token->type = SCR_TOKEN_DOUBLE_COLON;
    109 				src++;
    110 			} else {
    111 				token->type = SCR_TOKEN_COLON;
    112 			}
    113 
    114 			goto next_char;
    115 
    116 		case SCR_TOKEN_EQUALS:
    117 			token = token_stream_alloc(&ctx->stream);
    118 			assert(token);
    119 
    120 			if (lookahead[1] == SCR_TOKEN_EQUALS) {
    121 				token->type = SCR_TOKEN_DOUBLE_EQUALS;
    122 				src++;
    123 			} else {
    124 				token->type = SCR_TOKEN_EQUALS;
    125 			}
    126 
    127 			goto next_char;
    128 
    129 		/* TODO: a string literal */
    130 		/* TODO: a character literal */
    131 
    132 		/* an integer literal or float literal (TODO) */
    133 		case '0': case '1': case '2': case '3': case '4':
    134 		case '5': case '6': case '7': case '8': case '9':
    135 			buf = buf_end = src;
    136 
    137 			uint64_t value = strtoull(buf, &buf_end, 0);
    138 			if (ctx->verbose)
    139 				dbglog(ctx, "info: have integer literal: '%.*s'\n",
    140 					    (int) (buf_end - buf), buf);
    141 
    142 			if (errno == EINVAL) {
    143 				dbglog(ctx, "error: integer literal is invalid: '%.*s'\n",
    144 					    (int) (buf_end - buf), buf);
    145 				goto error;
    146 			} else if (errno == ERANGE) {
    147 				dbglog(ctx, "warn: integer literal is out of range, truncating: "
    148 					    "'%.*s'\n", (int) (buf_end - buf), buf);
    149 			}
    150 
    151 			token = token_stream_alloc(&ctx->stream);
    152 			assert(token);
    153 
    154 			token->type = SCR_TOKEN_LITERAL_INT;
    155 			token->literal_int = value;
    156 
    157 			src = buf_end;
    158 			break;
    159 
    160 		/* anything else is a fragment of a ident or keyword */
    161 		default:
    162 			if (!isalnum(lookahead[0]) && lookahead[0] != '_') {
    163 				dbglog(ctx, "error: unexpected character: %c\n",
    164 					    lookahead[0]);
    165 				goto error;
    166 			}
    167 
    168 			buf = buf_end = src;
    169 			while (isalnum(*buf_end) || *buf_end == '_')
    170 				buf_end++;
    171 
    172 			if (ctx->verbose)
    173 				dbglog(ctx, "info: have ident or keyword: '%.*s'\n",
    174 					    (int) (buf_end - buf), buf);
    175 
    176 			token = token_stream_alloc(&ctx->stream);
    177 			assert(token);
    178 
    179 			if (try_tokenise_keyword(buf, buf_end, token) < 0) {
    180 				token->type = SCR_TOKEN_IDENT;
    181 				token->ident = ident_pool_intern(&ctx->ident_pool,
    182 								 buf, buf_end - buf);
    183 			}
    184 
    185 			src = buf_end;
    186 
    187 			break;
    188 		}
    189 
    190 		continue;
    191 
    192 next_char:
    193 		src++;
    194 	}
    195 
    196 	return 0;
    197 
    198 error:
    199 	return -1;
    200 }
    201 
    202 /* parser
    203  * ===========================================================================
    204  */
    205 
    206 static inline struct script_token
    207 peek(struct compile_ctx *ctx, size_t off)
    208 {
    209 	if (ctx->stream.cur + off >= ctx->stream.len)
    210 		return (struct script_token) { .type = SCR_TOKEN_EOF, };
    211 
    212 	struct script_token tok = ctx->stream.ptr[ctx->stream.cur + off];
    213 	if (ctx->verbose)
    214 		dbglog(ctx, "info: peek(%zu/%zu) = '%s'\n", ctx->stream.cur + off,
    215 			    ctx->stream.len, script_token_type_str(tok.type));
    216 
    217 	return tok;
    218 }
    219 
    220 static inline struct script_token
    221 next(struct compile_ctx *ctx)
    222 {
    223 	if (ctx->stream.cur > ctx->stream.len)
    224 		return (struct script_token) { .type = SCR_TOKEN_EOF, };
    225 
    226 	return ctx->stream.ptr[ctx->stream.cur++];
    227 }
    228 
    229 static inline struct script_token
    230 expect(struct compile_ctx *ctx, enum script_token_type expected)
    231 {
    232 	struct script_token tok = next(ctx);
    233 
    234 	if (ctx->verbose)
    235 		dbglog(ctx, "info: expect(%zu/%zu, '%s') = '%s'\n", ctx->stream.cur,
    236 			    ctx->stream.len, script_token_type_str(expected),
    237 			    script_token_type_str(tok.type));
    238 
    239 	if (tok.type != expected) {
    240 		char buf[64];
    241 		int len = dump_token(&tok, buf, sizeof buf);
    242 		dbglog(ctx, "error: expected %s, got: %.*s\n",
    243 			    script_token_type_str(expected), len, buf);
    244 		PANIC();
    245 	}
    246 
    247 	return tok;
    248 }
    249 
    250 static inline int
    251 is_primitive_typeinfo(struct script_typeinfo *typeinfo)
    252 {
    253 	(void) typeinfo;
    254 
    255 	return 1;
    256 }
    257 
    258 static struct script_typeinfo *
    259 primitive_typeinfo(enum script_type type)
    260 {
    261 	static struct script_typeinfo typeinfos[] = {
    262 		[SCR_TYPE_U8]	= { .type = SCR_TYPE_U8,  .size = 1, .alignment = 1, },
    263 		[SCR_TYPE_U16]	= { .type = SCR_TYPE_U16, .size = 2, .alignment = 2, },
    264 		[SCR_TYPE_U32]	= { .type = SCR_TYPE_U32, .size = 4, .alignment = 4, },
    265 		[SCR_TYPE_U64]	= { .type = SCR_TYPE_U64, .size = 8, .alignment = 8, },
    266 		[SCR_TYPE_S8]	= { .type = SCR_TYPE_S8,  .size = 1, .alignment = 1, },
    267 		[SCR_TYPE_S16]	= { .type = SCR_TYPE_S16, .size = 2, .alignment = 2, },
    268 		[SCR_TYPE_S32]	= { .type = SCR_TYPE_S32, .size = 4, .alignment = 4, },
    269 		[SCR_TYPE_S64]	= { .type = SCR_TYPE_S64, .size = 8, .alignment = 8, },
    270 	};
    271 
    272 	return &typeinfos[type];
    273 }
    274 
    275 static struct script_typeinfo *
    276 literal_typeinfo(struct compile_ctx *ctx, enum script_token_type type)
    277 {
    278 	switch (type) {
    279 	case SCR_TOKEN_LITERAL_INT: // TODO: better rules surrounding literal types
    280 		return primitive_typeinfo(SCR_TYPE_U64);
    281 
    282 	default:
    283 		dbglog(ctx, "error: invalid token type has no type info: %s\n",
    284 			    script_token_type_str(type));
    285 		return NULL;
    286 	}
    287 }
    288 
    289 static struct script_typeinfo *
    290 parse_typeinfo(struct compile_ctx *ctx)
    291 {
    292 	struct script_token tok = next(ctx);
    293 
    294 	switch (tok.type) {
    295 	case SCR_TOKEN_U64:
    296 		return primitive_typeinfo(SCR_TYPE_U64);
    297 
    298 	default: {
    299 		char buf[64];
    300 		int len = dump_token(&tok, buf, sizeof buf);
    301 		dbglog(ctx, "error: expected typeinfo, got: %.*s\n", len, buf);
    302 		return NULL;
    303 	} break;
    304 	}
    305 }
    306 
    307 static struct script_expr *
    308 parse_expr(struct compile_ctx *ctx, struct script_typeinfo *expected_typeinfo)
    309 {
    310 	(void) expected_typeinfo; /* TODO: use this hint to upcast types if needed */
    311 
    312 	/* special-case assignment until we have a proper pratt parser for expressions */
    313 	if (peek(ctx, 0).type == SCR_TOKEN_IDENT && peek(ctx, 1).type == SCR_TOKEN_EQUALS) {
    314 		struct script_expr *expr = ALLOC_SIZED(&ctx->arena, struct script_expr);
    315 		assert(expr);
    316 
    317 		expr->type = SCR_EXPR_ASSIGNMENT;
    318 
    319 		struct script_flystr ident = expect(ctx, SCR_TOKEN_IDENT).ident;
    320 		struct script_symbol *sym = symbol_table_find(&ctx->symtab, ident);
    321 		assert(sym);
    322 
    323 		expect(ctx, SCR_TOKEN_EQUALS);
    324 
    325 		expr->assignment.ident = sym->ident;
    326 		expr->assignment.rhs = parse_expr(ctx, sym->variable.typeinfo);
    327 
    328 		return expr;
    329 	}
    330 
    331 	struct script_expr *stack[128];
    332 	size_t i = 0;
    333 
    334 	while (i < sizeof stack) {
    335 		struct script_token tok = peek(ctx, 0);
    336 
    337 		struct script_expr *expr;
    338 		switch (tok.type) {
    339 		case SCR_TOKEN_IDENT:
    340 			expr = ALLOC_SIZED(&ctx->arena, struct script_expr);
    341 			assert(expr);
    342 
    343 			tok = next(ctx);
    344 
    345 			expr->type = SCR_EXPR_IDENT;
    346 			expr->ident = tok.ident;
    347 
    348 			struct script_symbol *sym = symbol_table_find(&ctx->symtab, tok.ident);
    349 			assert(sym);
    350 
    351 			assert(sym->type == SCR_SYMBOL_VARIABLE);
    352 			expr->typeinfo = sym->variable.typeinfo;
    353 
    354 			break;
    355 
    356 		case SCR_TOKEN_LITERAL_INT:
    357 			expr = ALLOC_SIZED(&ctx->arena, struct script_expr);
    358 			assert(expr);
    359 
    360 			tok = next(ctx);
    361 
    362 			expr->type = SCR_EXPR_LITERAL_INT;
    363 			expr->literal_int = tok.literal_int;
    364 			expr->typeinfo = literal_typeinfo(ctx, tok.type);
    365 
    366 			break;
    367 
    368 		case SCR_TOKEN_PLUS:
    369 		case SCR_TOKEN_MINUS:
    370 		case SCR_TOKEN_STAR:
    371 		case SCR_TOKEN_RSLASH:
    372 		case SCR_TOKEN_DOUBLE_EQUALS:
    373 			expr = ALLOC_SIZED(&ctx->arena, struct script_expr);
    374 			assert(expr);
    375 
    376 			expr->type = SCR_EXPR_BINARY_OP;
    377 			switch (next(ctx).type) {
    378 			case SCR_TOKEN_PLUS:	expr->binary_op.type = SCR_BINARY_OP_ADD; break;
    379 			case SCR_TOKEN_MINUS:	expr->binary_op.type = SCR_BINARY_OP_SUB; break;
    380 			case SCR_TOKEN_STAR:	expr->binary_op.type = SCR_BINARY_OP_MUL; break;
    381 			case SCR_TOKEN_RSLASH:	expr->binary_op.type = SCR_BINARY_OP_DIV; break;
    382 
    383 			case SCR_TOKEN_DOUBLE_EQUALS:
    384 						expr->binary_op.type = SCR_BINARY_OP_EQU; break;
    385 
    386 			default:		UNREACHABLE(); break;
    387 			}
    388 
    389 			assert(i >= 2);
    390 			expr->binary_op.rhs = stack[--i];
    391 			expr->binary_op.lhs = stack[--i];
    392 
    393 			assert(expr->binary_op.lhs->typeinfo == expr->binary_op.rhs->typeinfo);
    394 			expr->typeinfo = expr->binary_op.lhs->typeinfo;
    395 
    396 			break;
    397 
    398 		default:
    399 			goto end;
    400 		}
    401 
    402 		assert(i < sizeof stack);
    403 		stack[i++] = expr;
    404 	}
    405 
    406 	if (i == sizeof stack) {
    407 		struct script_token tok = peek(ctx, 0);
    408 
    409 		char buf[64];
    410 		int len = dump_token(&tok, buf, sizeof buf);
    411 		dbglog(ctx, "error: stack overflow while parsing expression: '%.*s'\n",
    412 			    len, buf);
    413 
    414 		return NULL;
    415 	}
    416 
    417 	assert(i == 0);
    418 
    419 end:
    420 	return stack[0];
    421 }
    422 
    423 static struct script_stmt *
    424 parse_statement(struct compile_ctx *ctx, struct script_symbol *parent_scope);
    425 
    426 static struct script_stmt *
    427 parse_block(struct compile_ctx *ctx, struct script_symbol *parent_scope)
    428 {
    429 	struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
    430 	assert(stmt);
    431 
    432 	stmt->type = SCR_STMT_BLOCK;
    433 	stmt->block.children.head = stmt->block.children.tail = NULL;
    434 
    435 	expect(ctx, SCR_TOKEN_LBRACE);
    436 
    437 	while (peek(ctx, 0).type != SCR_TOKEN_RBRACE) {
    438 		struct script_stmt *child = parse_statement(ctx, parent_scope);
    439 		list_push_tail(&stmt->block.children, &child->list_node);
    440 	}
    441 
    442 	expect(ctx, SCR_TOKEN_RBRACE);
    443 
    444 	return stmt;
    445 }
    446 
    447 static struct script_stmt *
    448 parse_declstmt(struct compile_ctx *ctx, struct script_symbol *parent_scope)
    449 {
    450 	struct script_symbol *sym = symbol_table_push(&ctx->symtab);
    451 	assert(sym);
    452 
    453 	sym->type = SCR_SYMBOL_VARIABLE;
    454 	sym->parent_scope = parent_scope;
    455 	sym->list_node.prev = sym->list_node.next = NULL;
    456 
    457 	struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
    458 	assert(stmt);
    459 
    460 	stmt->type = SCR_STMT_DECL;
    461 	sym->ident = stmt->decl.ident = expect(ctx, SCR_TOKEN_IDENT).ident;
    462 
    463 	expect(ctx, SCR_TOKEN_COLON);
    464 	sym->variable.typeinfo = stmt->decl.typeinfo = parse_typeinfo(ctx);
    465 	sym->variable.addr = symbol_table_next_addr(&ctx->symtab, sym->variable.typeinfo);
    466 
    467 	expect(ctx, SCR_TOKEN_EQUALS);
    468 	stmt->decl.expr = parse_expr(ctx, sym->variable.typeinfo);
    469 
    470 	assert(stmt->decl.typeinfo == stmt->decl.expr->typeinfo);
    471 
    472 	expect(ctx, SCR_TOKEN_SEMICOLON);
    473 
    474 	return stmt;
    475 }
    476 
    477 static struct script_stmt *
    478 parse_exprstmt(struct compile_ctx *ctx, struct script_symbol *parent_scope)
    479 {
    480 	(void) parent_scope;
    481 
    482 	struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
    483 	assert(stmt);
    484 
    485 	stmt->type = SCR_STMT_EXPR;
    486 	stmt->expr = parse_expr(ctx, NULL);
    487 
    488 	expect(ctx, SCR_TOKEN_SEMICOLON);
    489 
    490 	return stmt;
    491 }
    492 
    493 static struct script_stmt *
    494 parse_if_else(struct compile_ctx *ctx, struct script_symbol *parent_scope)
    495 {
    496 	struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
    497 	assert(stmt);
    498 
    499 	stmt->type = SCR_STMT_IF_ELSE;
    500 
    501 	expect(ctx, SCR_TOKEN_IF);
    502 
    503 	expect(ctx, SCR_TOKEN_LPAREN);
    504 	stmt->if_else.cond = parse_expr(ctx, NULL);
    505 	assert(stmt->if_else.cond);
    506 
    507 	expect(ctx, SCR_TOKEN_RPAREN);
    508 
    509 	stmt->if_else.if_body = parse_statement(ctx, parent_scope);
    510 	assert(stmt->if_else.if_body);
    511 
    512 	if (peek(ctx, 0).type == SCR_TOKEN_ELSE) {
    513 		next(ctx);
    514 		stmt->if_else.else_body = parse_statement(ctx, parent_scope);
    515 		assert(stmt->if_else.else_body);
    516 	}
    517 
    518 	return stmt;
    519 }
    520 
    521 static struct script_stmt *
    522 parse_while_loop(struct compile_ctx *ctx, struct script_symbol *parent_scope)
    523 {
    524 	struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
    525 	assert(stmt);
    526 
    527 	stmt->type = SCR_STMT_WHILE_LOOP;
    528 
    529 	expect(ctx, SCR_TOKEN_WHILE);
    530 
    531 	expect(ctx, SCR_TOKEN_LPAREN);
    532 	stmt->while_loop.cond = parse_expr(ctx, NULL);
    533 	expect(ctx, SCR_TOKEN_RPAREN);
    534 
    535 	stmt->while_loop.while_body = parse_statement(ctx, parent_scope);
    536 
    537 	return stmt;
    538 }
    539 
    540 static struct script_stmt *
    541 parse_return(struct compile_ctx *ctx, struct script_symbol *parent_scope)
    542 {
    543 	(void) parent_scope;
    544 
    545 	struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
    546 	assert(stmt);
    547 
    548 	stmt->type = SCR_STMT_RET;
    549 	expect(ctx, SCR_TOKEN_RETURN);
    550 	stmt->ret.expr = parse_expr(ctx, NULL);
    551 
    552 	/* TODO: validate that return type is the same as parent scope function type? */
    553 
    554 	expect(ctx, SCR_TOKEN_SEMICOLON);
    555 
    556 	return stmt;
    557 }
    558 
    559 static struct script_stmt *
    560 parse_statement(struct compile_ctx *ctx, struct script_symbol *parent_scope)
    561 {
    562 	struct script_token tok = peek(ctx, 0);
    563 	switch (tok.type) {
    564 	case SCR_TOKEN_LBRACE:
    565 		return parse_block(ctx, parent_scope);
    566 
    567 	case SCR_TOKEN_IDENT:
    568 		if (peek(ctx, 1).type == SCR_TOKEN_COLON)
    569 			return parse_declstmt(ctx, parent_scope);
    570 		else
    571 			return parse_exprstmt(ctx, parent_scope);
    572 
    573 	case SCR_TOKEN_IF:
    574 		return parse_if_else(ctx, parent_scope);
    575 
    576 	case SCR_TOKEN_WHILE:
    577 		return parse_while_loop(ctx, parent_scope);
    578 
    579 	case SCR_TOKEN_RETURN:
    580 		return parse_return(ctx, parent_scope);
    581 
    582 	default: {
    583 		char buf[64];
    584 		int len = dump_token(&tok, buf, sizeof buf);
    585 		dbglog(ctx, "error: expected a statement, got: '%.*s'\n",
    586 			    len, buf);
    587 		return NULL;
    588 	} break;
    589 	}
    590 }
    591 
    592 static inline struct script_ir_typeinfo *
    593 primitive_ir_typeinfo(enum script_ir_type type)
    594 {
    595 	static struct script_ir_typeinfo typeinfos[] = {
    596 		[SCR_IR_TYPE_U8]  = { .type = SCR_IR_TYPE_U8,  .size = 1, .alignment = 1, },
    597 		[SCR_IR_TYPE_U16] = { .type = SCR_IR_TYPE_U16, .size = 2, .alignment = 2, },
    598 		[SCR_IR_TYPE_U32] = { .type = SCR_IR_TYPE_U32, .size = 4, .alignment = 4, },
    599 		[SCR_IR_TYPE_U64] = { .type = SCR_IR_TYPE_U64, .size = 8, .alignment = 8, },
    600 		[SCR_IR_TYPE_S8]  = { .type = SCR_IR_TYPE_S8,  .size = 1, .alignment = 1, },
    601 		[SCR_IR_TYPE_S16] = { .type = SCR_IR_TYPE_S16, .size = 2, .alignment = 2, },
    602 		[SCR_IR_TYPE_S32] = { .type = SCR_IR_TYPE_S32, .size = 4, .alignment = 4, },
    603 		[SCR_IR_TYPE_S64] = { .type = SCR_IR_TYPE_S64, .size = 8, .alignment = 8, },
    604 		[SCR_IR_TYPE_F32] = { .type = SCR_IR_TYPE_F32, .size = 4, .alignment = 4, },
    605 		[SCR_IR_TYPE_F64] = { .type = SCR_IR_TYPE_F64, .size = 8, .alignment = 8, },
    606 		[SCR_IR_TYPE_C8]  = { .type = SCR_IR_TYPE_C8,  .size = 1, .alignment = 1, },
    607 		[SCR_IR_TYPE_PTR] = { .type = SCR_IR_TYPE_PTR, .size = 8, .alignment = 8, },
    608 	};
    609 
    610 	return &typeinfos[type];
    611 }
    612 
    613 static inline struct script_ir_typeinfo *
    614 typeinfo_to_ir_typeinfo(struct script_typeinfo *typeinfo)
    615 {
    616 	switch (typeinfo->type) {
    617 	case SCR_TYPE_U8:	return primitive_ir_typeinfo(SCR_IR_TYPE_U8);
    618 	case SCR_TYPE_U16:	return primitive_ir_typeinfo(SCR_IR_TYPE_U16);
    619 	case SCR_TYPE_U32:	return primitive_ir_typeinfo(SCR_IR_TYPE_U32);
    620 	case SCR_TYPE_U64:	return primitive_ir_typeinfo(SCR_IR_TYPE_U64);
    621 
    622 	case SCR_TYPE_S8:	return primitive_ir_typeinfo(SCR_IR_TYPE_S8);
    623 	case SCR_TYPE_S16:	return primitive_ir_typeinfo(SCR_IR_TYPE_S16);
    624 	case SCR_TYPE_S32:	return primitive_ir_typeinfo(SCR_IR_TYPE_S32);
    625 	case SCR_TYPE_S64:	return primitive_ir_typeinfo(SCR_IR_TYPE_S64);
    626 
    627 	case SCR_TYPE_F32:	return primitive_ir_typeinfo(SCR_IR_TYPE_F32);
    628 	case SCR_TYPE_F64:	return primitive_ir_typeinfo(SCR_IR_TYPE_F64);
    629 
    630 	case SCR_TYPE_C8:	return primitive_ir_typeinfo(SCR_IR_TYPE_C8);
    631 
    632 	// TODO: handle pointer types?
    633 	}
    634 }
    635 
    636 static int
    637 emit_expr(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_expr *expr)
    638 {
    639 	struct script_symbol *sym;
    640 	switch (expr->type) {
    641 	case SCR_EXPR_IDENT:
    642 		sym = symbol_table_find(&ctx->symtab, expr->ident);
    643 		assert(sym);
    644 		assert(sym->type == SCR_SYMBOL_VARIABLE);
    645 
    646 		scratch->opcode = SCR_IR_LOAD;
    647 		scratch->typeinfo = typeinfo_to_ir_typeinfo(expr->typeinfo);
    648 		scratch->operands[0].type = SCR_IR_OPERAND_ADDRESS;
    649 		scratch->operands[0].address = sym->variable.addr;
    650 		scratch->operand_count = 1;
    651 		break;
    652 
    653 	case SCR_EXPR_LITERAL_INT:
    654 		scratch->opcode = SCR_IR_PUSH;
    655 		scratch->typeinfo = typeinfo_to_ir_typeinfo(expr->typeinfo);
    656 		scratch->operands[0].type = SCR_IR_OPERAND_LITERAL;
    657 		scratch->operands[0].literal = expr->literal_int;
    658 		scratch->operand_count = 1;
    659 		break;
    660 
    661 	case SCR_EXPR_ASSIGNMENT:
    662 		sym = symbol_table_find(&ctx->symtab, expr->assignment.ident);
    663 		assert(sym);
    664 		assert(sym->type == SCR_SYMBOL_VARIABLE);
    665 
    666 		if (emit_expr(ctx, scratch, expr->assignment.rhs) < 0)
    667 			return -1;
    668 
    669 		scratch->opcode = SCR_IR_STORE;
    670 		scratch->typeinfo = typeinfo_to_ir_typeinfo(sym->variable.typeinfo);
    671 		scratch->operands[0].type = SCR_IR_OPERAND_ADDRESS;
    672 		scratch->operands[0].address = sym->variable.addr;
    673 		scratch->operand_count = 1;
    674 		break;
    675 
    676 	case SCR_EXPR_BINARY_OP:
    677 		if (emit_expr(ctx, scratch, expr->binary_op.lhs) < 0)
    678 			return -1;
    679 
    680 		if (emit_expr(ctx, scratch, expr->binary_op.rhs) < 0)
    681 			return -1;
    682 
    683 		switch (expr->binary_op.type) {
    684 		case SCR_BINARY_OP_ADD: scratch->opcode = SCR_IR_ADD; break;
    685 		case SCR_BINARY_OP_SUB: scratch->opcode = SCR_IR_SUB; break;
    686 		case SCR_BINARY_OP_MUL: scratch->opcode = SCR_IR_MUL; break;
    687 		case SCR_BINARY_OP_DIV: scratch->opcode = SCR_IR_DIV; break;
    688 		case SCR_BINARY_OP_EQU: scratch->opcode = SCR_IR_CMP; break;
    689 		}
    690 
    691 		scratch->typeinfo = typeinfo_to_ir_typeinfo(expr->typeinfo);
    692 		scratch->operand_count = 0;
    693 		break;
    694 	}
    695 
    696 	if (!ir_push(&ctx->ir, scratch))
    697 		return -1;
    698 
    699 	return 0;
    700 }
    701 
    702 static int
    703 emit(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt);
    704 
    705 static int
    706 emit_block(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
    707 {
    708 	SCRIPT_LIST_ITER(&stmt->block.children) {
    709 		struct script_stmt *child = SCRIPT_FROM_NODE(it, struct script_stmt, list_node);
    710 
    711 		if (emit(ctx, scratch, child) < 0)
    712 			return -1;
    713 	}
    714 
    715 	return 0;
    716 }
    717 
    718 static int
    719 emit_declstmt(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
    720 {
    721 	assert(stmt->type == SCR_STMT_DECL);
    722 	assert(is_primitive_typeinfo(stmt->decl.typeinfo));
    723 
    724 	struct script_symbol *sym = symbol_table_find(&ctx->symtab, stmt->decl.ident);
    725 	assert(sym);
    726 	assert(sym->type == SCR_SYMBOL_VARIABLE);
    727 
    728 	if (emit_expr(ctx, scratch, stmt->decl.expr) < 0)
    729 		return -1;
    730 
    731 	scratch->opcode = SCR_IR_STORE;
    732 	scratch->typeinfo = typeinfo_to_ir_typeinfo(stmt->decl.typeinfo);
    733 	scratch->operands[0].type = SCR_IR_OPERAND_ADDRESS;
    734 	scratch->operands[0].address = sym->variable.addr;
    735 	scratch->operand_count = 1;
    736 
    737 	if (!ir_push(&ctx->ir, scratch))
    738 		return -1;
    739 
    740 	return 0;
    741 
    742 #if 0 /* TODO: more advanced rules for emitting variables */
    743 	if (sym->parent) { /* this is a function-local variable, enable full expressions */
    744 
    745 	} else { /* this is a global variable, can only have compiletime expressions */
    746 
    747 	}
    748 #endif
    749 }
    750 
    751 static int
    752 emit_if_else(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
    753 {
    754 	assert(stmt->type == SCR_STMT_IF_ELSE);
    755 
    756 	if (emit_expr(ctx, scratch, stmt->if_else.cond) < 0)
    757 		return -1;
    758 
    759 	scratch->opcode = SCR_IR_JNE;
    760 	scratch->typeinfo = NULL;
    761 	scratch->operands[0].type = SCR_IR_OPERAND_OFFSET;
    762 	scratch->operands[0].offset = 0;
    763 	scratch->operand_count = 1;
    764 
    765 	// TODO: patch this with if_body length
    766 	struct script_ir_inst *cond_failed_jump = ir_push(&ctx->ir, scratch);
    767 	if (!cond_failed_jump)
    768 		return -1;
    769 
    770 	if (emit(ctx, scratch, stmt->if_else.if_body) < 0)
    771 		return -1;
    772 
    773 	cond_failed_jump->operands[0].literal = ir_current_inst(&ctx->ir)
    774 					      - cond_failed_jump;
    775 	if (!stmt->if_else.else_body)
    776 		goto end;
    777 
    778 	scratch->opcode = SCR_IR_JMP;
    779 	scratch->typeinfo = NULL;
    780 	scratch->operands[0].type = SCR_IR_OPERAND_OFFSET;
    781 	scratch->operands[0].offset = 0;
    782 	scratch->operand_count = 1;
    783 
    784 	struct script_ir_inst *else_block_skip = ir_push(&ctx->ir, scratch);
    785 	if (!else_block_skip)
    786 		return -1;
    787 
    788 	// if we have an else block, we must take into account the else-block-skip jump
    789 	cond_failed_jump->operands[0].literal = ir_current_inst(&ctx->ir)
    790 					      - cond_failed_jump;
    791 
    792 	if (emit(ctx, scratch, stmt->if_else.else_body) < 0)
    793 		return -1;
    794 
    795 	else_block_skip->operands[0].literal = ir_current_inst(&ctx->ir)
    796 					     - else_block_skip;
    797 
    798 end:
    799 	return 0;
    800 }
    801 
    802 static int
    803 emit_while_loop(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
    804 {
    805 	assert(stmt->type == SCR_STMT_WHILE_LOOP);
    806 
    807 	struct script_ir_inst const *top_of_loop = ir_current_inst(&ctx->ir);
    808 
    809 	if (emit_expr(ctx, scratch, stmt->while_loop.cond) < 0)
    810 		return -1;
    811 
    812 	scratch->opcode = SCR_IR_JEQ;
    813 	scratch->typeinfo = NULL;
    814 	scratch->operands[0].type = SCR_IR_OPERAND_OFFSET;
    815 	scratch->operands[0].offset = 0;
    816 	scratch->operand_count = 1;
    817 
    818 	struct script_ir_inst *cond_failed_jump = ir_push(&ctx->ir, scratch);
    819 	if (!cond_failed_jump)
    820 		return -1;
    821 
    822 	if (emit(ctx, scratch, stmt->while_loop.while_body) < 0)
    823 		return -1;
    824 
    825 	scratch->opcode = SCR_IR_JMP;
    826 	scratch->typeinfo = NULL;
    827 	scratch->operands[0].type = SCR_IR_OPERAND_OFFSET;
    828 	scratch->operands[0].offset = top_of_loop - ir_current_inst(&ctx->ir);
    829 	scratch->operand_count = 1;
    830 
    831 	struct script_ir_inst *jump_to_top = ir_push(&ctx->ir, scratch);
    832 	if (!jump_to_top)
    833 		return -1;
    834 
    835 	cond_failed_jump->operands[0].literal = ir_current_inst(&ctx->ir) - cond_failed_jump;
    836 
    837 	return 0;
    838 }
    839 
    840 static int
    841 emit_return(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
    842 {
    843 	assert(stmt->type == SCR_STMT_RET);
    844 	assert(is_primitive_typeinfo(stmt->ret.expr->typeinfo));
    845 
    846 	if (emit_expr(ctx, scratch, stmt->ret.expr) < 0)
    847 		return -1;
    848 
    849 	scratch->opcode = SCR_IR_RET;
    850 	scratch->typeinfo = typeinfo_to_ir_typeinfo(stmt->ret.expr->typeinfo);
    851 	scratch->operand_count = 0;
    852 
    853 	if (!ir_push(&ctx->ir, scratch))
    854 		return -1;
    855 
    856 	return 0;
    857 }
    858 
    859 static int
    860 emit(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
    861 {
    862 	switch (stmt->type) {
    863 	case SCR_STMT_BLOCK:		return emit_block(ctx, scratch, stmt);
    864 	case SCR_STMT_DECL:		return emit_declstmt(ctx, scratch, stmt);
    865 	case SCR_STMT_EXPR:		return emit_expr(ctx, scratch, stmt->expr);
    866 	case SCR_STMT_IF_ELSE:		return emit_if_else(ctx, scratch, stmt);
    867 	case SCR_STMT_WHILE_LOOP:	return emit_while_loop(ctx, scratch, stmt);
    868 	case SCR_STMT_RET:		return emit_return(ctx, scratch, stmt);
    869 	}
    870 
    871 	return -1;
    872 }
    873 
    874 static int
    875 parse(struct compile_ctx *ctx)
    876 {
    877 	ctx->ast.roots.head = ctx->ast.roots.tail = NULL;
    878 
    879 	struct script_ir_inst inst;
    880 	while (peek(ctx, 0).type != SCR_TOKEN_EOF) {
    881 		struct script_stmt *stmt = parse_statement(ctx, NULL);
    882 		list_push_tail(&ctx->ast.roots, &stmt->list_node);
    883 
    884 		if (emit(ctx, &inst, stmt) < 0) {
    885 			dbglog(ctx, "error: failed to emit statement\n");
    886 			return -1;
    887 		}
    888 	}
    889 
    890 	assert(ctx->stream.cur == ctx->stream.len);
    891 
    892 	return 0;
    893 }
    894 
    895 /* libscript
    896  * ===========================================================================
    897  */
    898 
    899 int
    900 script_compile(char *src, size_t src_len, void *mem, size_t mem_len,
    901 	       struct script_program *out, FILE *errstream, int verbose)
    902 {
    903 	assert(mem_len > sizeof(struct compile_ctx));
    904 
    905 	struct compile_ctx *ctx = mem;
    906 	ctx->arena.ptr = (char *) mem + sizeof *ctx;
    907 	ctx->arena.cap = mem_len - sizeof *ctx;
    908 	ctx->arena.len = 0;
    909 
    910 	ctx->scratch.cap = SCRIPT_COMPILE_SCRATCH_BYTES;
    911 	ctx->scratch.ptr = ALLOC_ARRAY(&ctx->arena, char, ctx->scratch.cap);
    912 	ctx->scratch.len = 0;
    913 
    914 	ctx->errstream = errstream;
    915 	ctx->verbose = verbose;
    916 	ctx->src = src;
    917 	ctx->len = src_len;
    918 
    919 	ctx->ident_pool.cap = SCRIPT_COMPILE_MAX_IDENTS;
    920 	ctx->ident_pool.ptr = ALLOC_ARRAY(&ctx->arena, struct identifier, ctx->ident_pool.cap);
    921 	ctx->ident_pool.len = 0;
    922 	assert(ctx->ident_pool.ptr);
    923 
    924 	ctx->stream.cap = SCRIPT_COMPILE_MAX_TOKS;
    925 	ctx->stream.ptr = ALLOC_ARRAY(&ctx->arena, struct script_token, ctx->stream.cap);
    926 	ctx->stream.len = ctx->stream.cur = 0;
    927 	assert(ctx->stream.ptr);
    928 
    929 	ctx->symtab.cap = SCRIPT_COMPILE_MAX_SYMS;
    930 	ctx->symtab.ptr = ALLOC_ARRAY(&ctx->arena, struct script_symbol, ctx->symtab.cap);
    931 	ctx->symtab.len = 0;
    932 	assert(ctx->symtab.ptr);
    933 
    934 	ctx->ir.cap = SCRIPT_COMPILE_MAX_IR_INSTRS;
    935 	ctx->ir.ptr = ALLOC_ARRAY(&ctx->arena, struct script_ir_inst, ctx->ir.cap);
    936 	ctx->ir.len = 0;
    937 	assert(ctx->ir.ptr);
    938 
    939 	if (ctx->verbose) {
    940 		dbglog(ctx, "info: arena cap: %zu bytes, scratch cap: %zu bytes, verbose: %d\n",
    941 			     ctx->arena.cap, ctx->scratch.cap, verbose);
    942 	}
    943 
    944 	int res;
    945 	if ((res = tokenise(ctx)) < 0) {
    946 		dbglog(ctx, "error: failed to tokenise source\n");
    947 		return -1;
    948 	}
    949 
    950 	if (ctx->verbose)
    951 		dump_token_stream(ctx);
    952 
    953 	if ((res = parse(ctx)) < 0) {
    954 		dbglog(ctx, "error: failed to parse source\n");
    955 		return -1;
    956 	}
    957 
    958 	if (ctx->verbose) {
    959 		dump_symbol_table(ctx);
    960 		dbglog(ctx, "\n");
    961 		dump_ast(ctx);
    962 		dbglog(ctx, "\n");
    963 		dump_ir(ctx);
    964 	}
    965 
    966 	out->instructions.ptr = ctx->ir.ptr;
    967 	out->instructions.len = ctx->ir.len;
    968 	out->max_heap_bytes = ctx->symtab.address;
    969 
    970 	return 0;
    971 }
    972 
    973 #include "debug.c"