libscript.c (26558B)
1 #include "libscript_internal.h" 2 3 /* tokeniser (lexer) 4 * =========================================================================== 5 */ 6 7 static int 8 try_tokenise_keyword(char *src, char *end, struct script_token *out) 9 { 10 size_t len = end - src; 11 12 if (len == strlen("u8") && strncmp(src, "u8", len) == 0) { 13 out->type = SCR_TOKEN_U8; 14 return 0; 15 } else if (len == strlen("u16") && strncmp(src, "u16", len) == 0) { 16 out->type = SCR_TOKEN_U16; 17 return 0; 18 } else if (len == strlen("u32") && strncmp(src, "u32", len) == 0) { 19 out->type = SCR_TOKEN_U32; 20 return 0; 21 } else if (len == strlen("u64") && strncmp(src, "u64", len) == 0) { 22 out->type = SCR_TOKEN_U64; 23 return 0; 24 } else if (len == strlen("s8") && strncmp(src, "s8", len) == 0) { 25 out->type = SCR_TOKEN_S8; 26 return 0; 27 } else if (len == strlen("s16") && strncmp(src, "s16", len) == 0) { 28 out->type = SCR_TOKEN_S16; 29 return 0; 30 } else if (len == strlen("s32") && strncmp(src, "s32", len) == 0) { 31 out->type = SCR_TOKEN_S32; 32 return 0; 33 } else if (len == strlen("s64") && strncmp(src, "s64", len) == 0) { 34 out->type = SCR_TOKEN_S64; 35 return 0; 36 } else if (len == strlen("f32") && strncmp(src, "f32", len) == 0) { 37 out->type = SCR_TOKEN_F32; 38 return 0; 39 } else if (len == strlen("f64") && strncmp(src, "f64", len) == 0) { 40 out->type = SCR_TOKEN_F64; 41 return 0; 42 } else if (len == strlen("c8") && strncmp(src, "c8", len) == 0) { 43 out->type = SCR_TOKEN_C8; 44 return 0; 45 } 46 47 if (len == strlen("return") && strncmp(src, "return", len) == 0) { 48 out->type = SCR_TOKEN_RETURN; 49 return 0; 50 } else if (len == strlen("if") && strncmp(src, "if", len) == 0) { 51 out->type = SCR_TOKEN_IF; 52 return 0; 53 } else if (len == strlen("else") && strncmp(src, "else", len) == 0) { 54 out->type = SCR_TOKEN_ELSE; 55 return 0; 56 } else if (len == strlen("while") && strncmp(src, "while", len) == 0) { 57 out->type = SCR_TOKEN_WHILE; 58 return 0; 59 } 60 61 return -1; 62 } 63 64 static int 65 tokenise(struct compile_ctx *ctx) 66 { 67 if (ctx->verbose) 68 dbglog(ctx, "info: tokenising %zu bytes of source\n", ctx->len); 69 70 char *src = ctx->src, *end = ctx->src + ctx->len; 71 72 struct script_token *token; 73 char *buf = src, *buf_end = src; 74 75 while (src < end) { 76 char lookahead[] = { 77 src[0], 78 (src + 1 < end) ? src[1] : '\0', 79 (src + 2 < end) ? src[2] : '\0', 80 }; 81 82 if (isspace(lookahead[0])) 83 goto next_char; 84 85 switch (lookahead[0]) { 86 /* these single char sequences map directly to a unique token */ 87 case SCR_TOKEN_LPAREN: case SCR_TOKEN_RPAREN: 88 case SCR_TOKEN_LBRACK: case SCR_TOKEN_RBRACK: 89 case SCR_TOKEN_LBRACE: case SCR_TOKEN_RBRACE: 90 case SCR_TOKEN_LANGLE: case SCR_TOKEN_RANGLE: 91 case SCR_TOKEN_LSLASH: case SCR_TOKEN_RSLASH: 92 case SCR_TOKEN_SEMICOLON: 93 case SCR_TOKEN_DOT: case SCR_TOKEN_COMMA: 94 case SCR_TOKEN_PLUS: case SCR_TOKEN_MINUS: 95 case SCR_TOKEN_STAR: 96 token = token_stream_alloc(&ctx->stream); 97 assert(token); 98 99 token->type = (enum script_token_type) lookahead[0]; 100 goto next_char; 101 102 /* TODO: multi-char sequences map to unique tokens */ 103 case SCR_TOKEN_COLON: 104 token = token_stream_alloc(&ctx->stream); 105 assert(token); 106 107 if (lookahead[1] == SCR_TOKEN_COLON) { 108 token->type = SCR_TOKEN_DOUBLE_COLON; 109 src++; 110 } else { 111 token->type = SCR_TOKEN_COLON; 112 } 113 114 goto next_char; 115 116 case SCR_TOKEN_EQUALS: 117 token = token_stream_alloc(&ctx->stream); 118 assert(token); 119 120 if (lookahead[1] == SCR_TOKEN_EQUALS) { 121 token->type = SCR_TOKEN_DOUBLE_EQUALS; 122 src++; 123 } else { 124 token->type = SCR_TOKEN_EQUALS; 125 } 126 127 goto next_char; 128 129 /* TODO: a string literal */ 130 /* TODO: a character literal */ 131 132 /* an integer literal or float literal (TODO) */ 133 case '0': case '1': case '2': case '3': case '4': 134 case '5': case '6': case '7': case '8': case '9': 135 buf = buf_end = src; 136 137 uint64_t value = strtoull(buf, &buf_end, 0); 138 if (ctx->verbose) 139 dbglog(ctx, "info: have integer literal: '%.*s'\n", 140 (int) (buf_end - buf), buf); 141 142 if (errno == EINVAL) { 143 dbglog(ctx, "error: integer literal is invalid: '%.*s'\n", 144 (int) (buf_end - buf), buf); 145 goto error; 146 } else if (errno == ERANGE) { 147 dbglog(ctx, "warn: integer literal is out of range, truncating: " 148 "'%.*s'\n", (int) (buf_end - buf), buf); 149 } 150 151 token = token_stream_alloc(&ctx->stream); 152 assert(token); 153 154 token->type = SCR_TOKEN_LITERAL_INT; 155 token->literal_int = value; 156 157 src = buf_end; 158 break; 159 160 /* anything else is a fragment of a ident or keyword */ 161 default: 162 if (!isalnum(lookahead[0]) && lookahead[0] != '_') { 163 dbglog(ctx, "error: unexpected character: %c\n", 164 lookahead[0]); 165 goto error; 166 } 167 168 buf = buf_end = src; 169 while (isalnum(*buf_end) || *buf_end == '_') 170 buf_end++; 171 172 if (ctx->verbose) 173 dbglog(ctx, "info: have ident or keyword: '%.*s'\n", 174 (int) (buf_end - buf), buf); 175 176 token = token_stream_alloc(&ctx->stream); 177 assert(token); 178 179 if (try_tokenise_keyword(buf, buf_end, token) < 0) { 180 token->type = SCR_TOKEN_IDENT; 181 token->ident = ident_pool_intern(&ctx->ident_pool, 182 buf, buf_end - buf); 183 } 184 185 src = buf_end; 186 187 break; 188 } 189 190 continue; 191 192 next_char: 193 src++; 194 } 195 196 return 0; 197 198 error: 199 return -1; 200 } 201 202 /* parser 203 * =========================================================================== 204 */ 205 206 static inline struct script_token 207 peek(struct compile_ctx *ctx, size_t off) 208 { 209 if (ctx->stream.cur + off >= ctx->stream.len) 210 return (struct script_token) { .type = SCR_TOKEN_EOF, }; 211 212 struct script_token tok = ctx->stream.ptr[ctx->stream.cur + off]; 213 if (ctx->verbose) 214 dbglog(ctx, "info: peek(%zu/%zu) = '%s'\n", ctx->stream.cur + off, 215 ctx->stream.len, script_token_type_str(tok.type)); 216 217 return tok; 218 } 219 220 static inline struct script_token 221 next(struct compile_ctx *ctx) 222 { 223 if (ctx->stream.cur > ctx->stream.len) 224 return (struct script_token) { .type = SCR_TOKEN_EOF, }; 225 226 return ctx->stream.ptr[ctx->stream.cur++]; 227 } 228 229 static inline struct script_token 230 expect(struct compile_ctx *ctx, enum script_token_type expected) 231 { 232 struct script_token tok = next(ctx); 233 234 if (ctx->verbose) 235 dbglog(ctx, "info: expect(%zu/%zu, '%s') = '%s'\n", ctx->stream.cur, 236 ctx->stream.len, script_token_type_str(expected), 237 script_token_type_str(tok.type)); 238 239 if (tok.type != expected) { 240 char buf[64]; 241 int len = dump_token(&tok, buf, sizeof buf); 242 dbglog(ctx, "error: expected %s, got: %.*s\n", 243 script_token_type_str(expected), len, buf); 244 PANIC(); 245 } 246 247 return tok; 248 } 249 250 static inline int 251 is_primitive_typeinfo(struct script_typeinfo *typeinfo) 252 { 253 (void) typeinfo; 254 255 return 1; 256 } 257 258 static struct script_typeinfo * 259 primitive_typeinfo(enum script_type type) 260 { 261 static struct script_typeinfo typeinfos[] = { 262 [SCR_TYPE_U8] = { .type = SCR_TYPE_U8, .size = 1, .alignment = 1, }, 263 [SCR_TYPE_U16] = { .type = SCR_TYPE_U16, .size = 2, .alignment = 2, }, 264 [SCR_TYPE_U32] = { .type = SCR_TYPE_U32, .size = 4, .alignment = 4, }, 265 [SCR_TYPE_U64] = { .type = SCR_TYPE_U64, .size = 8, .alignment = 8, }, 266 [SCR_TYPE_S8] = { .type = SCR_TYPE_S8, .size = 1, .alignment = 1, }, 267 [SCR_TYPE_S16] = { .type = SCR_TYPE_S16, .size = 2, .alignment = 2, }, 268 [SCR_TYPE_S32] = { .type = SCR_TYPE_S32, .size = 4, .alignment = 4, }, 269 [SCR_TYPE_S64] = { .type = SCR_TYPE_S64, .size = 8, .alignment = 8, }, 270 }; 271 272 return &typeinfos[type]; 273 } 274 275 static struct script_typeinfo * 276 literal_typeinfo(struct compile_ctx *ctx, enum script_token_type type) 277 { 278 switch (type) { 279 case SCR_TOKEN_LITERAL_INT: // TODO: better rules surrounding literal types 280 return primitive_typeinfo(SCR_TYPE_U64); 281 282 default: 283 dbglog(ctx, "error: invalid token type has no type info: %s\n", 284 script_token_type_str(type)); 285 return NULL; 286 } 287 } 288 289 static struct script_typeinfo * 290 parse_typeinfo(struct compile_ctx *ctx) 291 { 292 struct script_token tok = next(ctx); 293 294 switch (tok.type) { 295 case SCR_TOKEN_U64: 296 return primitive_typeinfo(SCR_TYPE_U64); 297 298 default: { 299 char buf[64]; 300 int len = dump_token(&tok, buf, sizeof buf); 301 dbglog(ctx, "error: expected typeinfo, got: %.*s\n", len, buf); 302 return NULL; 303 } break; 304 } 305 } 306 307 static struct script_expr * 308 parse_expr(struct compile_ctx *ctx, struct script_typeinfo *expected_typeinfo) 309 { 310 (void) expected_typeinfo; /* TODO: use this hint to upcast types if needed */ 311 312 /* special-case assignment until we have a proper pratt parser for expressions */ 313 if (peek(ctx, 0).type == SCR_TOKEN_IDENT && peek(ctx, 1).type == SCR_TOKEN_EQUALS) { 314 struct script_expr *expr = ALLOC_SIZED(&ctx->arena, struct script_expr); 315 assert(expr); 316 317 expr->type = SCR_EXPR_ASSIGNMENT; 318 319 struct script_flystr ident = expect(ctx, SCR_TOKEN_IDENT).ident; 320 struct script_symbol *sym = symbol_table_find(&ctx->symtab, ident); 321 assert(sym); 322 323 expect(ctx, SCR_TOKEN_EQUALS); 324 325 expr->assignment.ident = sym->ident; 326 expr->assignment.rhs = parse_expr(ctx, sym->variable.typeinfo); 327 328 return expr; 329 } 330 331 struct script_expr *stack[128]; 332 size_t i = 0; 333 334 while (i < sizeof stack) { 335 struct script_token tok = peek(ctx, 0); 336 337 struct script_expr *expr; 338 switch (tok.type) { 339 case SCR_TOKEN_IDENT: 340 expr = ALLOC_SIZED(&ctx->arena, struct script_expr); 341 assert(expr); 342 343 tok = next(ctx); 344 345 expr->type = SCR_EXPR_IDENT; 346 expr->ident = tok.ident; 347 348 struct script_symbol *sym = symbol_table_find(&ctx->symtab, tok.ident); 349 assert(sym); 350 351 assert(sym->type == SCR_SYMBOL_VARIABLE); 352 expr->typeinfo = sym->variable.typeinfo; 353 354 break; 355 356 case SCR_TOKEN_LITERAL_INT: 357 expr = ALLOC_SIZED(&ctx->arena, struct script_expr); 358 assert(expr); 359 360 tok = next(ctx); 361 362 expr->type = SCR_EXPR_LITERAL_INT; 363 expr->literal_int = tok.literal_int; 364 expr->typeinfo = literal_typeinfo(ctx, tok.type); 365 366 break; 367 368 case SCR_TOKEN_PLUS: 369 case SCR_TOKEN_MINUS: 370 case SCR_TOKEN_STAR: 371 case SCR_TOKEN_RSLASH: 372 case SCR_TOKEN_DOUBLE_EQUALS: 373 expr = ALLOC_SIZED(&ctx->arena, struct script_expr); 374 assert(expr); 375 376 expr->type = SCR_EXPR_BINARY_OP; 377 switch (next(ctx).type) { 378 case SCR_TOKEN_PLUS: expr->binary_op.type = SCR_BINARY_OP_ADD; break; 379 case SCR_TOKEN_MINUS: expr->binary_op.type = SCR_BINARY_OP_SUB; break; 380 case SCR_TOKEN_STAR: expr->binary_op.type = SCR_BINARY_OP_MUL; break; 381 case SCR_TOKEN_RSLASH: expr->binary_op.type = SCR_BINARY_OP_DIV; break; 382 383 case SCR_TOKEN_DOUBLE_EQUALS: 384 expr->binary_op.type = SCR_BINARY_OP_EQU; break; 385 386 default: UNREACHABLE(); break; 387 } 388 389 assert(i >= 2); 390 expr->binary_op.rhs = stack[--i]; 391 expr->binary_op.lhs = stack[--i]; 392 393 assert(expr->binary_op.lhs->typeinfo == expr->binary_op.rhs->typeinfo); 394 expr->typeinfo = expr->binary_op.lhs->typeinfo; 395 396 break; 397 398 default: 399 goto end; 400 } 401 402 assert(i < sizeof stack); 403 stack[i++] = expr; 404 } 405 406 if (i == sizeof stack) { 407 struct script_token tok = peek(ctx, 0); 408 409 char buf[64]; 410 int len = dump_token(&tok, buf, sizeof buf); 411 dbglog(ctx, "error: stack overflow while parsing expression: '%.*s'\n", 412 len, buf); 413 414 return NULL; 415 } 416 417 assert(i == 0); 418 419 end: 420 return stack[0]; 421 } 422 423 static struct script_stmt * 424 parse_statement(struct compile_ctx *ctx, struct script_symbol *parent_scope); 425 426 static struct script_stmt * 427 parse_block(struct compile_ctx *ctx, struct script_symbol *parent_scope) 428 { 429 struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt); 430 assert(stmt); 431 432 stmt->type = SCR_STMT_BLOCK; 433 stmt->block.children.head = stmt->block.children.tail = NULL; 434 435 expect(ctx, SCR_TOKEN_LBRACE); 436 437 while (peek(ctx, 0).type != SCR_TOKEN_RBRACE) { 438 struct script_stmt *child = parse_statement(ctx, parent_scope); 439 list_push_tail(&stmt->block.children, &child->list_node); 440 } 441 442 expect(ctx, SCR_TOKEN_RBRACE); 443 444 return stmt; 445 } 446 447 static struct script_stmt * 448 parse_declstmt(struct compile_ctx *ctx, struct script_symbol *parent_scope) 449 { 450 struct script_symbol *sym = symbol_table_push(&ctx->symtab); 451 assert(sym); 452 453 sym->type = SCR_SYMBOL_VARIABLE; 454 sym->parent_scope = parent_scope; 455 sym->list_node.prev = sym->list_node.next = NULL; 456 457 struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt); 458 assert(stmt); 459 460 stmt->type = SCR_STMT_DECL; 461 sym->ident = stmt->decl.ident = expect(ctx, SCR_TOKEN_IDENT).ident; 462 463 expect(ctx, SCR_TOKEN_COLON); 464 sym->variable.typeinfo = stmt->decl.typeinfo = parse_typeinfo(ctx); 465 sym->variable.addr = symbol_table_next_addr(&ctx->symtab, sym->variable.typeinfo); 466 467 expect(ctx, SCR_TOKEN_EQUALS); 468 stmt->decl.expr = parse_expr(ctx, sym->variable.typeinfo); 469 470 assert(stmt->decl.typeinfo == stmt->decl.expr->typeinfo); 471 472 expect(ctx, SCR_TOKEN_SEMICOLON); 473 474 return stmt; 475 } 476 477 static struct script_stmt * 478 parse_exprstmt(struct compile_ctx *ctx, struct script_symbol *parent_scope) 479 { 480 (void) parent_scope; 481 482 struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt); 483 assert(stmt); 484 485 stmt->type = SCR_STMT_EXPR; 486 stmt->expr = parse_expr(ctx, NULL); 487 488 expect(ctx, SCR_TOKEN_SEMICOLON); 489 490 return stmt; 491 } 492 493 static struct script_stmt * 494 parse_if_else(struct compile_ctx *ctx, struct script_symbol *parent_scope) 495 { 496 struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt); 497 assert(stmt); 498 499 stmt->type = SCR_STMT_IF_ELSE; 500 501 expect(ctx, SCR_TOKEN_IF); 502 503 expect(ctx, SCR_TOKEN_LPAREN); 504 stmt->if_else.cond = parse_expr(ctx, NULL); 505 assert(stmt->if_else.cond); 506 507 expect(ctx, SCR_TOKEN_RPAREN); 508 509 stmt->if_else.if_body = parse_statement(ctx, parent_scope); 510 assert(stmt->if_else.if_body); 511 512 if (peek(ctx, 0).type == SCR_TOKEN_ELSE) { 513 next(ctx); 514 stmt->if_else.else_body = parse_statement(ctx, parent_scope); 515 assert(stmt->if_else.else_body); 516 } 517 518 return stmt; 519 } 520 521 static struct script_stmt * 522 parse_while_loop(struct compile_ctx *ctx, struct script_symbol *parent_scope) 523 { 524 struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt); 525 assert(stmt); 526 527 stmt->type = SCR_STMT_WHILE_LOOP; 528 529 expect(ctx, SCR_TOKEN_WHILE); 530 531 expect(ctx, SCR_TOKEN_LPAREN); 532 stmt->while_loop.cond = parse_expr(ctx, NULL); 533 expect(ctx, SCR_TOKEN_RPAREN); 534 535 stmt->while_loop.while_body = parse_statement(ctx, parent_scope); 536 537 return stmt; 538 } 539 540 static struct script_stmt * 541 parse_return(struct compile_ctx *ctx, struct script_symbol *parent_scope) 542 { 543 (void) parent_scope; 544 545 struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt); 546 assert(stmt); 547 548 stmt->type = SCR_STMT_RET; 549 expect(ctx, SCR_TOKEN_RETURN); 550 stmt->ret.expr = parse_expr(ctx, NULL); 551 552 /* TODO: validate that return type is the same as parent scope function type? */ 553 554 expect(ctx, SCR_TOKEN_SEMICOLON); 555 556 return stmt; 557 } 558 559 static struct script_stmt * 560 parse_statement(struct compile_ctx *ctx, struct script_symbol *parent_scope) 561 { 562 struct script_token tok = peek(ctx, 0); 563 switch (tok.type) { 564 case SCR_TOKEN_LBRACE: 565 return parse_block(ctx, parent_scope); 566 567 case SCR_TOKEN_IDENT: 568 if (peek(ctx, 1).type == SCR_TOKEN_COLON) 569 return parse_declstmt(ctx, parent_scope); 570 else 571 return parse_exprstmt(ctx, parent_scope); 572 573 case SCR_TOKEN_IF: 574 return parse_if_else(ctx, parent_scope); 575 576 case SCR_TOKEN_WHILE: 577 return parse_while_loop(ctx, parent_scope); 578 579 case SCR_TOKEN_RETURN: 580 return parse_return(ctx, parent_scope); 581 582 default: { 583 char buf[64]; 584 int len = dump_token(&tok, buf, sizeof buf); 585 dbglog(ctx, "error: expected a statement, got: '%.*s'\n", 586 len, buf); 587 return NULL; 588 } break; 589 } 590 } 591 592 static inline struct script_ir_typeinfo * 593 primitive_ir_typeinfo(enum script_ir_type type) 594 { 595 static struct script_ir_typeinfo typeinfos[] = { 596 [SCR_IR_TYPE_U8] = { .type = SCR_IR_TYPE_U8, .size = 1, .alignment = 1, }, 597 [SCR_IR_TYPE_U16] = { .type = SCR_IR_TYPE_U16, .size = 2, .alignment = 2, }, 598 [SCR_IR_TYPE_U32] = { .type = SCR_IR_TYPE_U32, .size = 4, .alignment = 4, }, 599 [SCR_IR_TYPE_U64] = { .type = SCR_IR_TYPE_U64, .size = 8, .alignment = 8, }, 600 [SCR_IR_TYPE_S8] = { .type = SCR_IR_TYPE_S8, .size = 1, .alignment = 1, }, 601 [SCR_IR_TYPE_S16] = { .type = SCR_IR_TYPE_S16, .size = 2, .alignment = 2, }, 602 [SCR_IR_TYPE_S32] = { .type = SCR_IR_TYPE_S32, .size = 4, .alignment = 4, }, 603 [SCR_IR_TYPE_S64] = { .type = SCR_IR_TYPE_S64, .size = 8, .alignment = 8, }, 604 [SCR_IR_TYPE_F32] = { .type = SCR_IR_TYPE_F32, .size = 4, .alignment = 4, }, 605 [SCR_IR_TYPE_F64] = { .type = SCR_IR_TYPE_F64, .size = 8, .alignment = 8, }, 606 [SCR_IR_TYPE_C8] = { .type = SCR_IR_TYPE_C8, .size = 1, .alignment = 1, }, 607 [SCR_IR_TYPE_PTR] = { .type = SCR_IR_TYPE_PTR, .size = 8, .alignment = 8, }, 608 }; 609 610 return &typeinfos[type]; 611 } 612 613 static inline struct script_ir_typeinfo * 614 typeinfo_to_ir_typeinfo(struct script_typeinfo *typeinfo) 615 { 616 switch (typeinfo->type) { 617 case SCR_TYPE_U8: return primitive_ir_typeinfo(SCR_IR_TYPE_U8); 618 case SCR_TYPE_U16: return primitive_ir_typeinfo(SCR_IR_TYPE_U16); 619 case SCR_TYPE_U32: return primitive_ir_typeinfo(SCR_IR_TYPE_U32); 620 case SCR_TYPE_U64: return primitive_ir_typeinfo(SCR_IR_TYPE_U64); 621 622 case SCR_TYPE_S8: return primitive_ir_typeinfo(SCR_IR_TYPE_S8); 623 case SCR_TYPE_S16: return primitive_ir_typeinfo(SCR_IR_TYPE_S16); 624 case SCR_TYPE_S32: return primitive_ir_typeinfo(SCR_IR_TYPE_S32); 625 case SCR_TYPE_S64: return primitive_ir_typeinfo(SCR_IR_TYPE_S64); 626 627 case SCR_TYPE_F32: return primitive_ir_typeinfo(SCR_IR_TYPE_F32); 628 case SCR_TYPE_F64: return primitive_ir_typeinfo(SCR_IR_TYPE_F64); 629 630 case SCR_TYPE_C8: return primitive_ir_typeinfo(SCR_IR_TYPE_C8); 631 632 // TODO: handle pointer types? 633 } 634 } 635 636 static int 637 emit_expr(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_expr *expr) 638 { 639 struct script_symbol *sym; 640 switch (expr->type) { 641 case SCR_EXPR_IDENT: 642 sym = symbol_table_find(&ctx->symtab, expr->ident); 643 assert(sym); 644 assert(sym->type == SCR_SYMBOL_VARIABLE); 645 646 scratch->opcode = SCR_IR_LOAD; 647 scratch->typeinfo = typeinfo_to_ir_typeinfo(expr->typeinfo); 648 scratch->operands[0].type = SCR_IR_OPERAND_ADDRESS; 649 scratch->operands[0].address = sym->variable.addr; 650 scratch->operand_count = 1; 651 break; 652 653 case SCR_EXPR_LITERAL_INT: 654 scratch->opcode = SCR_IR_PUSH; 655 scratch->typeinfo = typeinfo_to_ir_typeinfo(expr->typeinfo); 656 scratch->operands[0].type = SCR_IR_OPERAND_LITERAL; 657 scratch->operands[0].literal = expr->literal_int; 658 scratch->operand_count = 1; 659 break; 660 661 case SCR_EXPR_ASSIGNMENT: 662 sym = symbol_table_find(&ctx->symtab, expr->assignment.ident); 663 assert(sym); 664 assert(sym->type == SCR_SYMBOL_VARIABLE); 665 666 if (emit_expr(ctx, scratch, expr->assignment.rhs) < 0) 667 return -1; 668 669 scratch->opcode = SCR_IR_STORE; 670 scratch->typeinfo = typeinfo_to_ir_typeinfo(sym->variable.typeinfo); 671 scratch->operands[0].type = SCR_IR_OPERAND_ADDRESS; 672 scratch->operands[0].address = sym->variable.addr; 673 scratch->operand_count = 1; 674 break; 675 676 case SCR_EXPR_BINARY_OP: 677 if (emit_expr(ctx, scratch, expr->binary_op.lhs) < 0) 678 return -1; 679 680 if (emit_expr(ctx, scratch, expr->binary_op.rhs) < 0) 681 return -1; 682 683 switch (expr->binary_op.type) { 684 case SCR_BINARY_OP_ADD: scratch->opcode = SCR_IR_ADD; break; 685 case SCR_BINARY_OP_SUB: scratch->opcode = SCR_IR_SUB; break; 686 case SCR_BINARY_OP_MUL: scratch->opcode = SCR_IR_MUL; break; 687 case SCR_BINARY_OP_DIV: scratch->opcode = SCR_IR_DIV; break; 688 case SCR_BINARY_OP_EQU: scratch->opcode = SCR_IR_CMP; break; 689 } 690 691 scratch->typeinfo = typeinfo_to_ir_typeinfo(expr->typeinfo); 692 scratch->operand_count = 0; 693 break; 694 } 695 696 if (!ir_push(&ctx->ir, scratch)) 697 return -1; 698 699 return 0; 700 } 701 702 static int 703 emit(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt); 704 705 static int 706 emit_block(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt) 707 { 708 SCRIPT_LIST_ITER(&stmt->block.children) { 709 struct script_stmt *child = SCRIPT_FROM_NODE(it, struct script_stmt, list_node); 710 711 if (emit(ctx, scratch, child) < 0) 712 return -1; 713 } 714 715 return 0; 716 } 717 718 static int 719 emit_declstmt(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt) 720 { 721 assert(stmt->type == SCR_STMT_DECL); 722 assert(is_primitive_typeinfo(stmt->decl.typeinfo)); 723 724 struct script_symbol *sym = symbol_table_find(&ctx->symtab, stmt->decl.ident); 725 assert(sym); 726 assert(sym->type == SCR_SYMBOL_VARIABLE); 727 728 if (emit_expr(ctx, scratch, stmt->decl.expr) < 0) 729 return -1; 730 731 scratch->opcode = SCR_IR_STORE; 732 scratch->typeinfo = typeinfo_to_ir_typeinfo(stmt->decl.typeinfo); 733 scratch->operands[0].type = SCR_IR_OPERAND_ADDRESS; 734 scratch->operands[0].address = sym->variable.addr; 735 scratch->operand_count = 1; 736 737 if (!ir_push(&ctx->ir, scratch)) 738 return -1; 739 740 return 0; 741 742 #if 0 /* TODO: more advanced rules for emitting variables */ 743 if (sym->parent) { /* this is a function-local variable, enable full expressions */ 744 745 } else { /* this is a global variable, can only have compiletime expressions */ 746 747 } 748 #endif 749 } 750 751 static int 752 emit_if_else(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt) 753 { 754 assert(stmt->type == SCR_STMT_IF_ELSE); 755 756 if (emit_expr(ctx, scratch, stmt->if_else.cond) < 0) 757 return -1; 758 759 scratch->opcode = SCR_IR_JNE; 760 scratch->typeinfo = NULL; 761 scratch->operands[0].type = SCR_IR_OPERAND_OFFSET; 762 scratch->operands[0].offset = 0; 763 scratch->operand_count = 1; 764 765 // TODO: patch this with if_body length 766 struct script_ir_inst *cond_failed_jump = ir_push(&ctx->ir, scratch); 767 if (!cond_failed_jump) 768 return -1; 769 770 if (emit(ctx, scratch, stmt->if_else.if_body) < 0) 771 return -1; 772 773 cond_failed_jump->operands[0].literal = ir_current_inst(&ctx->ir) 774 - cond_failed_jump; 775 if (!stmt->if_else.else_body) 776 goto end; 777 778 scratch->opcode = SCR_IR_JMP; 779 scratch->typeinfo = NULL; 780 scratch->operands[0].type = SCR_IR_OPERAND_OFFSET; 781 scratch->operands[0].offset = 0; 782 scratch->operand_count = 1; 783 784 struct script_ir_inst *else_block_skip = ir_push(&ctx->ir, scratch); 785 if (!else_block_skip) 786 return -1; 787 788 // if we have an else block, we must take into account the else-block-skip jump 789 cond_failed_jump->operands[0].literal = ir_current_inst(&ctx->ir) 790 - cond_failed_jump; 791 792 if (emit(ctx, scratch, stmt->if_else.else_body) < 0) 793 return -1; 794 795 else_block_skip->operands[0].literal = ir_current_inst(&ctx->ir) 796 - else_block_skip; 797 798 end: 799 return 0; 800 } 801 802 static int 803 emit_while_loop(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt) 804 { 805 assert(stmt->type == SCR_STMT_WHILE_LOOP); 806 807 struct script_ir_inst const *top_of_loop = ir_current_inst(&ctx->ir); 808 809 if (emit_expr(ctx, scratch, stmt->while_loop.cond) < 0) 810 return -1; 811 812 scratch->opcode = SCR_IR_JEQ; 813 scratch->typeinfo = NULL; 814 scratch->operands[0].type = SCR_IR_OPERAND_OFFSET; 815 scratch->operands[0].offset = 0; 816 scratch->operand_count = 1; 817 818 struct script_ir_inst *cond_failed_jump = ir_push(&ctx->ir, scratch); 819 if (!cond_failed_jump) 820 return -1; 821 822 if (emit(ctx, scratch, stmt->while_loop.while_body) < 0) 823 return -1; 824 825 scratch->opcode = SCR_IR_JMP; 826 scratch->typeinfo = NULL; 827 scratch->operands[0].type = SCR_IR_OPERAND_OFFSET; 828 scratch->operands[0].offset = top_of_loop - ir_current_inst(&ctx->ir); 829 scratch->operand_count = 1; 830 831 struct script_ir_inst *jump_to_top = ir_push(&ctx->ir, scratch); 832 if (!jump_to_top) 833 return -1; 834 835 cond_failed_jump->operands[0].literal = ir_current_inst(&ctx->ir) - cond_failed_jump; 836 837 return 0; 838 } 839 840 static int 841 emit_return(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt) 842 { 843 assert(stmt->type == SCR_STMT_RET); 844 assert(is_primitive_typeinfo(stmt->ret.expr->typeinfo)); 845 846 if (emit_expr(ctx, scratch, stmt->ret.expr) < 0) 847 return -1; 848 849 scratch->opcode = SCR_IR_RET; 850 scratch->typeinfo = typeinfo_to_ir_typeinfo(stmt->ret.expr->typeinfo); 851 scratch->operand_count = 0; 852 853 if (!ir_push(&ctx->ir, scratch)) 854 return -1; 855 856 return 0; 857 } 858 859 static int 860 emit(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt) 861 { 862 switch (stmt->type) { 863 case SCR_STMT_BLOCK: return emit_block(ctx, scratch, stmt); 864 case SCR_STMT_DECL: return emit_declstmt(ctx, scratch, stmt); 865 case SCR_STMT_EXPR: return emit_expr(ctx, scratch, stmt->expr); 866 case SCR_STMT_IF_ELSE: return emit_if_else(ctx, scratch, stmt); 867 case SCR_STMT_WHILE_LOOP: return emit_while_loop(ctx, scratch, stmt); 868 case SCR_STMT_RET: return emit_return(ctx, scratch, stmt); 869 } 870 871 return -1; 872 } 873 874 static int 875 parse(struct compile_ctx *ctx) 876 { 877 ctx->ast.roots.head = ctx->ast.roots.tail = NULL; 878 879 struct script_ir_inst inst; 880 while (peek(ctx, 0).type != SCR_TOKEN_EOF) { 881 struct script_stmt *stmt = parse_statement(ctx, NULL); 882 list_push_tail(&ctx->ast.roots, &stmt->list_node); 883 884 if (emit(ctx, &inst, stmt) < 0) { 885 dbglog(ctx, "error: failed to emit statement\n"); 886 return -1; 887 } 888 } 889 890 assert(ctx->stream.cur == ctx->stream.len); 891 892 return 0; 893 } 894 895 /* libscript 896 * =========================================================================== 897 */ 898 899 int 900 script_compile(char *src, size_t src_len, void *mem, size_t mem_len, 901 struct script_program *out, FILE *errstream, int verbose) 902 { 903 assert(mem_len > sizeof(struct compile_ctx)); 904 905 struct compile_ctx *ctx = mem; 906 ctx->arena.ptr = (char *) mem + sizeof *ctx; 907 ctx->arena.cap = mem_len - sizeof *ctx; 908 ctx->arena.len = 0; 909 910 ctx->scratch.cap = SCRIPT_COMPILE_SCRATCH_BYTES; 911 ctx->scratch.ptr = ALLOC_ARRAY(&ctx->arena, char, ctx->scratch.cap); 912 ctx->scratch.len = 0; 913 914 ctx->errstream = errstream; 915 ctx->verbose = verbose; 916 ctx->src = src; 917 ctx->len = src_len; 918 919 ctx->ident_pool.cap = SCRIPT_COMPILE_MAX_IDENTS; 920 ctx->ident_pool.ptr = ALLOC_ARRAY(&ctx->arena, struct identifier, ctx->ident_pool.cap); 921 ctx->ident_pool.len = 0; 922 assert(ctx->ident_pool.ptr); 923 924 ctx->stream.cap = SCRIPT_COMPILE_MAX_TOKS; 925 ctx->stream.ptr = ALLOC_ARRAY(&ctx->arena, struct script_token, ctx->stream.cap); 926 ctx->stream.len = ctx->stream.cur = 0; 927 assert(ctx->stream.ptr); 928 929 ctx->symtab.cap = SCRIPT_COMPILE_MAX_SYMS; 930 ctx->symtab.ptr = ALLOC_ARRAY(&ctx->arena, struct script_symbol, ctx->symtab.cap); 931 ctx->symtab.len = 0; 932 assert(ctx->symtab.ptr); 933 934 ctx->ir.cap = SCRIPT_COMPILE_MAX_IR_INSTRS; 935 ctx->ir.ptr = ALLOC_ARRAY(&ctx->arena, struct script_ir_inst, ctx->ir.cap); 936 ctx->ir.len = 0; 937 assert(ctx->ir.ptr); 938 939 if (ctx->verbose) { 940 dbglog(ctx, "info: arena cap: %zu bytes, scratch cap: %zu bytes, verbose: %d\n", 941 ctx->arena.cap, ctx->scratch.cap, verbose); 942 } 943 944 int res; 945 if ((res = tokenise(ctx)) < 0) { 946 dbglog(ctx, "error: failed to tokenise source\n"); 947 return -1; 948 } 949 950 if (ctx->verbose) 951 dump_token_stream(ctx); 952 953 if ((res = parse(ctx)) < 0) { 954 dbglog(ctx, "error: failed to parse source\n"); 955 return -1; 956 } 957 958 if (ctx->verbose) { 959 dump_symbol_table(ctx); 960 dbglog(ctx, "\n"); 961 dump_ast(ctx); 962 dbglog(ctx, "\n"); 963 dump_ir(ctx); 964 } 965 966 out->instructions.ptr = ctx->ir.ptr; 967 out->instructions.len = ctx->ir.len; 968 out->max_heap_bytes = ctx->symtab.address; 969 970 return 0; 971 } 972 973 #include "debug.c"