commit 68fbc5a18539ff8efda3b3c6e207a59b8040d63d
parent 6a5f2ced150f268918dd720df39df2e303beeea9
Author: MikoĊaj Lenczewski <mblenczewski@gmail.com>
Date: Tue, 21 Jan 2025 22:42:47 +0000
Add initial support for branching and looping
We now support a very basic form of branching (via and if-else
statement) and looping (via a while statement). This also brings support
for reassignment of previously declared variables. IR has been extended
to support arbitrary jumps (both unconditional, and conditional on an
accumulator being zero or nonzero).
Have yet to implement functions and their stack behaviour.
Diffstat:
12 files changed, 746 insertions(+), 220 deletions(-)
diff --git a/debug.sh b/debug.sh
@@ -2,11 +2,13 @@
case "$1" in
vm)
- lldbgui -- bin/scriptvm -v examples/test.script
+ shift
+ lldbgui -- bin/scriptvm -v $@
;;
cc)
+ shift
mkdir -p out
- lldbgui -- bin/scriptcc -v -o out/test examples/test.scipt
+ lldbgui -- bin/scriptcc -v -o out/test $@
;;
*)
diff --git a/docs/simple.grammar b/docs/simple.grammar
@@ -1,5 +1,28 @@
program =
- | [ statement-list ] $eof
+ | $eof
+ | declaration program
+ ;
+
+declaration =
+ | variable-declaration
+ | function-declaration
+ ;
+
+variable-declaration =
+ | ident ':' type '=' expr ';'
+ ;
+
+function-declaration =
+ | ident '::' '(' [ parameter-list ] ')' ':' type block-statement
+ ;
+
+parameter-list =
+ | parameter
+ | parameter ',' parameter-list
+ ;
+
+parameter =
+ | ident ':' type
;
statement-list =
@@ -8,26 +31,36 @@ statement-list =
;
statement =
- | null-statement
+ | block-statement
| decl-statement
+ | expr-statement
+ | if-else-statement
+ | while-statement
| ret-statement
- | block-statement
;
-null-statement =
- | ';'
+block-statement =
+ | '{' [ statement-list ] '}'
;
decl-statement =
| ident ':' type '=' expr ';'
;
-ret-statement =
- | 'return' ident ';'
+expr-statement =
+ | expr ';'
;
-block-statement =
- | '{' [ statement-list ] '}'
+if-else-statement =
+ | 'if' '(' expr ')' statement [ 'else' statement ]
+ ;
+
+while-statement =
+ | 'while' '(' expr ')' statement
+ ;
+
+ret-statement =
+ | 'return' ident ';'
;
ident =
@@ -35,15 +68,30 @@ ident =
;
type =
+ | 'u8'
+ | 'u16'
+ | 'u32'
| 'u64'
+ | 's8'
+ | 's16'
+ | 's32'
+ | 's64'
+ | 'f32'
+ | 'f64'
+ | 'c8'
;
expr =
| ident
| literal
+ | assignment
| binary-op
;
+assignment =
+ | ident '=' expr
+ ;
+
binary-op =
| expr expr binary-operator
;
diff --git a/examples/branch.script b/examples/branch.script
@@ -0,0 +1,8 @@
+x : u64 = 0;
+y : u64 = 1;
+
+if (x y ==) {
+ return 1;
+} else {
+ return 2;
+}
diff --git a/examples/complex.script b/examples/complex.script
@@ -0,0 +1,27 @@
+rec_fact :: (n: u64) : u64 {
+ if (n 0 == ) {
+ return 1;
+ }
+
+ return n fact(n - 1) *;
+}
+
+iter_fact :: (n: u64) : u64 {
+ i: u64 = 1;
+
+ while (n) {
+ i = i n *;
+ n = n 1 -;
+ }
+
+ return i;
+}
+
+main :: () : s32 {
+ n : u64 = 10;
+
+ x : u64 = rec_fact(n);
+ y : u64 = iter_fact(n);
+
+ return x y ==;
+}
diff --git a/examples/expr.script b/examples/expr.script
@@ -1 +1,14 @@
-return 8 8 * 64 / 4 + 2 -;
+x : u64 = 1;
+y : u64 = 2;
+z : u64 = 0;
+
+z = y x *;
+
+w : u64 = 8 8 * 16 / 4 +;
+
+i : u64 = 2;
+i = i i *;
+i = i i *;
+i = i i *;
+
+return w z -;
diff --git a/examples/func.script b/examples/func.script
@@ -0,0 +1,6 @@
+func :: () : u64 {
+ return 0;
+}
+
+x : u64 = func();
+return x;
diff --git a/examples/loop.script b/examples/loop.script
@@ -0,0 +1,9 @@
+x : u64 = 10;
+y : u64 = 1;
+
+while (x) {
+ y = y x *;
+ x = x 1 -;
+}
+
+return y;
diff --git a/libscript/debug.c b/libscript/debug.c
@@ -1,50 +1,58 @@
#include "libscript_internal.h"
-static char const *
-dump_token_type_str(enum script_token_type type)
+char const *
+script_token_type_str(enum script_token_type type)
{
- static char const *type_to_str[] = {
- [SCR_TOKEN_EOF] = "EOF",
- [SCR_TOKEN_IDENT] = "IDENT",
- [SCR_TOKEN_LITERAL_INT] = "LITERAL_INT",
-
- [SCR_TOKEN_U8] = "U8",
- [SCR_TOKEN_U16] = "U16",
- [SCR_TOKEN_U32] = "U32",
- [SCR_TOKEN_U64] = "U64",
-
- [SCR_TOKEN_S8] = "S8",
- [SCR_TOKEN_S16] = "S16",
- [SCR_TOKEN_S32] = "S32",
- [SCR_TOKEN_S64] = "S64",
-
- [SCR_TOKEN_F32] = "F32",
- [SCR_TOKEN_F64] = "F64",
-
- [SCR_TOKEN_C8] = "C8",
-
- [SCR_TOKEN_RETURN] = "RETURN",
- [SCR_TOKEN_LPAREN] = "(",
- [SCR_TOKEN_RPAREN] = ")",
- [SCR_TOKEN_LBRACK] = "[",
- [SCR_TOKEN_RBRACK] = "]",
- [SCR_TOKEN_LBRACE] = "{",
- [SCR_TOKEN_RBRACE] = "}",
- [SCR_TOKEN_LANGLE] = "<",
- [SCR_TOKEN_RANGLE] = ">",
- [SCR_TOKEN_LSLASH] = "\\",
- [SCR_TOKEN_RSLASH] = "/",
- [SCR_TOKEN_COLON] = ":",
- [SCR_TOKEN_SEMICOLON] = ";",
- [SCR_TOKEN_DOT] = ".",
- [SCR_TOKEN_COMMA] = ",",
- [SCR_TOKEN_EQUALS] = "=",
- [SCR_TOKEN_PLUS] = "+",
- [SCR_TOKEN_MINUS] = "-",
- [SCR_TOKEN_STAR] = "*",
- };
-
- return type_to_str[type];
+ switch (type) {
+ case SCR_TOKEN_EOF: return "EOF";
+
+ case SCR_TOKEN_LPAREN: return "(";
+ case SCR_TOKEN_RPAREN: return ")";
+ case SCR_TOKEN_LBRACK: return "[";
+ case SCR_TOKEN_RBRACK: return "]";
+ case SCR_TOKEN_LBRACE: return "{";
+ case SCR_TOKEN_RBRACE: return "}";
+ case SCR_TOKEN_LANGLE: return "<";
+ case SCR_TOKEN_RANGLE: return ">";
+ case SCR_TOKEN_LSLASH: return "\\";
+ case SCR_TOKEN_RSLASH: return "/";
+ case SCR_TOKEN_COLON: return ":";
+ case SCR_TOKEN_SEMICOLON: return ";";
+ case SCR_TOKEN_DOT: return ".";
+ case SCR_TOKEN_COMMA: return ",";
+ case SCR_TOKEN_EQUALS: return "=";
+ case SCR_TOKEN_PLUS: return "+";
+ case SCR_TOKEN_MINUS: return "-";
+ case SCR_TOKEN_STAR: return "*";
+
+ case SCR_TOKEN_DOUBLE_COLON: return "::";
+ case SCR_TOKEN_DOUBLE_EQUALS: return "==";
+
+ case SCR_TOKEN_IDENT: return "IDENT";
+ case SCR_TOKEN_LITERAL_INT: return "LITERAL_INT";
+
+ case SCR_TOKEN_U8: return "U8";
+ case SCR_TOKEN_U16: return "U16";
+ case SCR_TOKEN_U32: return "U32";
+ case SCR_TOKEN_U64: return "U64";
+
+ case SCR_TOKEN_S8: return "S8";
+ case SCR_TOKEN_S16: return "S16";
+ case SCR_TOKEN_S32: return "S32";
+ case SCR_TOKEN_S64: return "S64";
+
+ case SCR_TOKEN_F32: return "F32";
+ case SCR_TOKEN_F64: return "F64";
+
+ case SCR_TOKEN_C8: return "C8";
+
+ case SCR_TOKEN_RETURN: return "RETURN";
+ case SCR_TOKEN_IF: return "IF";
+ case SCR_TOKEN_ELSE: return "ELSE";
+ case SCR_TOKEN_WHILE: return "WHILE";
+
+ case _SCR_TOKEN_MULTICHAR: return NULL;
+ }
}
static int
@@ -52,15 +60,15 @@ dump_token(struct script_token *token, char *buf, size_t cap)
{
switch (token->type) {
case SCR_TOKEN_IDENT:
- return snprintf(buf, cap, "Token {type: %s, ident: %" PRIu64 "}",
- dump_token_type_str(token->type), token->ident.v);
+ return snprintf(buf, cap, "Token %s, %" PRIu64,
+ script_token_type_str(token->type), token->ident.v);
case SCR_TOKEN_LITERAL_INT:
- return snprintf(buf, cap, "Token {type: %s, literal_int: %" PRIu64 "}",
- dump_token_type_str(token->type), token->literal_int);
+ return snprintf(buf, cap, "Token %s, %" PRIu64,
+ script_token_type_str(token->type), token->literal_int);
default:
- return snprintf(buf, cap, "Token {type: %s}", dump_token_type_str(token->type));
+ return snprintf(buf, cap, "Token %s", script_token_type_str(token->type));
}
}
@@ -165,12 +173,24 @@ dump_expr(struct compile_ctx *ctx, struct script_expr *expr, size_t indent)
dump_typeinfo(ctx, expr->typeinfo, indent + 2);
break;
+ case SCR_EXPR_ASSIGNMENT:
+ ident_pool_get(&ctx->ident_pool, expr->assignment.ident, &str, &len);
+
+ leader(indent)
+ dbglog(ctx, "assignment: %.*s\n", (int) len, str);
+
+ leader(indent + 1)
+ dbglog(ctx, "value:\n");
+ dump_expr(ctx, expr->assignment.rhs, indent + 2);
+ break;
+
case SCR_EXPR_BINARY_OP: {
static char const *binary_op_str[] = {
[SCR_BINARY_OP_ADD] = "+",
[SCR_BINARY_OP_SUB] = "-",
[SCR_BINARY_OP_MUL] = "*",
[SCR_BINARY_OP_DIV] = "/",
+ [SCR_BINARY_OP_EQU] = "==",
};
leader(indent)
@@ -215,19 +235,56 @@ dump_stmt(struct compile_ctx *ctx, struct script_stmt *node, size_t indent)
}
break;
- case SCR_STMT_VARDECL:
- ident_pool_get(&ctx->ident_pool, node->vardecl.ident, &str, &len);
+ case SCR_STMT_DECL:
+ ident_pool_get(&ctx->ident_pool, node->decl.ident, &str, &len);
leader(indent)
- dbglog(ctx, "vardecl: ident: %.*s\n", (int) len, str);
+ dbglog(ctx, "decl: ident: %.*s\n", (int) len, str);
leader(indent + 1)
dbglog(ctx, "typeinfo:\n");
- dump_typeinfo(ctx, node->vardecl.typeinfo, indent + 2);
+ dump_typeinfo(ctx, node->decl.typeinfo, indent + 2);
leader(indent + 1)
dbglog(ctx, "expr:\n");
- dump_expr(ctx, node->vardecl.expr, indent + 2);
+ dump_expr(ctx, node->decl.expr, indent + 2);
+ break;
+
+ case SCR_STMT_EXPR:
+ leader(indent)
+ dbglog(ctx, "expr:\n");
+ dump_expr(ctx, node->expr, indent + 1);
+ break;
+
+ case SCR_STMT_IF_ELSE:
+ leader(indent)
+ dbglog(ctx, "if-else:\n");
+
+ leader(indent + 1)
+ dbglog(ctx, "cond:\n");
+ dump_expr(ctx, node->if_else.cond, indent + 2);
+
+ leader(indent + 1)
+ dbglog(ctx, "success:\n");
+ dump_stmt(ctx, node->if_else.if_body, indent + 2);
+
+ leader(indent + 1)
+ dbglog(ctx, "failure:\n");
+ if (node->if_else.else_body)
+ dump_stmt(ctx, node->if_else.else_body, indent + 2);
+ break;
+
+ case SCR_STMT_WHILE_LOOP:
+ leader(indent)
+ dbglog(ctx, "while-loop:\n");
+
+ leader(indent + 1)
+ dbglog(ctx, "cond:\n");
+ dump_expr(ctx, node->while_loop.cond, indent + 2);
+
+ leader(indent + 1)
+ dbglog(ctx, "body:\n");
+ dump_stmt(ctx, node->while_loop.while_body, indent + 2);
break;
case SCR_STMT_RET:
@@ -265,8 +322,8 @@ dump_ast(struct compile_ctx *ctx)
}
}
-static inline char const *
-dump_ir_opcode_str(enum script_ir_opcode opcode)
+char const *
+script_ir_opcode_str(enum script_ir_opcode opcode)
{
switch (opcode) {
case SCR_IR_LOAD: return "LOAD";
@@ -274,6 +331,10 @@ dump_ir_opcode_str(enum script_ir_opcode opcode)
case SCR_IR_PUSH: return "PUSH";
case SCR_IR_POP: return "POP";
case SCR_IR_RET: return "RET";
+ case SCR_IR_CMP: return "CMP";
+ case SCR_IR_JMP: return "JMP";
+ case SCR_IR_JNE: return "JNE";
+ case SCR_IR_JEQ: return "JEQ";
case SCR_IR_ADD: return "ADD";
case SCR_IR_SUB: return "SUB";
case SCR_IR_MUL: return "MUL";
@@ -281,10 +342,10 @@ dump_ir_opcode_str(enum script_ir_opcode opcode)
}
}
-static inline char const *
-dump_ir_typeinfo_str(struct script_ir_typeinfo *typeinfo)
+char const *
+script_ir_type_str(enum script_ir_type type)
{
- switch (typeinfo->type) {
+ switch (type) {
case SCR_IR_TYPE_U8: return "U8";
case SCR_IR_TYPE_U16: return "U16";
case SCR_IR_TYPE_U32: return "U32";
@@ -311,13 +372,13 @@ dump_ir_operands(struct compile_ctx *ctx, struct script_ir_operand *buf, size_t
dbglog(ctx, "LITERAL{0x%" PRIx64 "}", operand->literal);
break;
- case SCR_IR_OPERAND_POINTER:
- dbglog(ctx, "POINTER{0x%" PRIx64 "}", operand->pointer);
- break;
-
case SCR_IR_OPERAND_ADDRESS:
dbglog(ctx, "ADDRESS{0x%" PRIx64 "}", operand->address);
break;
+
+ case SCR_IR_OPERAND_OFFSET:
+ dbglog(ctx, "OFFSET{%" PRIi64 "}", operand->offset);
+ break;
}
dbglog(ctx, ", ");
@@ -331,9 +392,10 @@ dump_ir(struct compile_ctx *ctx)
for (size_t i = 0; i < ctx->ir.len; i++) {
struct script_ir_inst *inst = &ctx->ir.ptr[i];
- dbglog(ctx, "\t[%03zu] %5s<%s> ", i,
- dump_ir_opcode_str(inst->opcode),
- dump_ir_typeinfo_str(inst->typeinfo));
+ dbglog(ctx, "\t[%03zu] %5s ", i, script_ir_opcode_str(inst->opcode));
+
+ if (inst->typeinfo)
+ dbglog(ctx, "<%s> ", script_ir_type_str(inst->typeinfo->type));
dump_ir_operands(ctx, inst->operands, inst->operand_count);
diff --git a/libscript/libscript.c b/libscript/libscript.c
@@ -47,6 +47,15 @@ try_tokenise_keyword(char *src, char *end, struct script_token *out)
if (len == strlen("return") && strncmp(src, "return", len) == 0) {
out->type = SCR_TOKEN_RETURN;
return 0;
+ } else if (len == strlen("if") && strncmp(src, "if", len) == 0) {
+ out->type = SCR_TOKEN_IF;
+ return 0;
+ } else if (len == strlen("else") && strncmp(src, "else", len) == 0) {
+ out->type = SCR_TOKEN_ELSE;
+ return 0;
+ } else if (len == strlen("while") && strncmp(src, "while", len) == 0) {
+ out->type = SCR_TOKEN_WHILE;
+ return 0;
}
return -1;
@@ -80,10 +89,10 @@ tokenise(struct compile_ctx *ctx)
case SCR_TOKEN_LBRACE: case SCR_TOKEN_RBRACE:
case SCR_TOKEN_LANGLE: case SCR_TOKEN_RANGLE:
case SCR_TOKEN_LSLASH: case SCR_TOKEN_RSLASH:
- case SCR_TOKEN_COLON: case SCR_TOKEN_SEMICOLON:
+ case SCR_TOKEN_SEMICOLON:
case SCR_TOKEN_DOT: case SCR_TOKEN_COMMA:
case SCR_TOKEN_PLUS: case SCR_TOKEN_MINUS:
- case SCR_TOKEN_STAR: case SCR_TOKEN_EQUALS:
+ case SCR_TOKEN_STAR:
token = token_stream_alloc(&ctx->stream);
assert(token);
@@ -91,6 +100,32 @@ tokenise(struct compile_ctx *ctx)
goto next_char;
/* TODO: multi-char sequences map to unique tokens */
+ case SCR_TOKEN_COLON:
+ token = token_stream_alloc(&ctx->stream);
+ assert(token);
+
+ if (lookahead[1] == SCR_TOKEN_COLON) {
+ token->type = SCR_TOKEN_DOUBLE_COLON;
+ src++;
+ } else {
+ token->type = SCR_TOKEN_COLON;
+ }
+
+ goto next_char;
+
+ case SCR_TOKEN_EQUALS:
+ token = token_stream_alloc(&ctx->stream);
+ assert(token);
+
+ if (lookahead[1] == SCR_TOKEN_EQUALS) {
+ token->type = SCR_TOKEN_DOUBLE_EQUALS;
+ src++;
+ } else {
+ token->type = SCR_TOKEN_EQUALS;
+ }
+
+ goto next_char;
+
/* TODO: a string literal */
/* TODO: a character literal */
@@ -177,7 +212,7 @@ peek(struct compile_ctx *ctx, size_t off)
struct script_token tok = ctx->stream.ptr[ctx->stream.cur + off];
if (ctx->verbose)
dbglog(ctx, "info: peek(%zu/%zu) = '%s'\n", ctx->stream.cur + off,
- ctx->stream.len, dump_token_type_str(tok.type));
+ ctx->stream.len, script_token_type_str(tok.type));
return tok;
}
@@ -198,14 +233,14 @@ expect(struct compile_ctx *ctx, enum script_token_type expected)
if (ctx->verbose)
dbglog(ctx, "info: expect(%zu/%zu, '%s') = '%s'\n", ctx->stream.cur,
- ctx->stream.len, dump_token_type_str(expected),
- dump_token_type_str(tok.type));
+ ctx->stream.len, script_token_type_str(expected),
+ script_token_type_str(tok.type));
if (tok.type != expected) {
char buf[64];
int len = dump_token(&tok, buf, sizeof buf);
dbglog(ctx, "error: expected %s, got: %.*s\n",
- dump_token_type_str(expected), len, buf);
+ script_token_type_str(expected), len, buf);
PANIC();
}
@@ -246,7 +281,7 @@ literal_typeinfo(struct compile_ctx *ctx, enum script_token_type type)
default:
dbglog(ctx, "error: invalid token type has no type info: %s\n",
- dump_token_type_str(type));
+ script_token_type_str(type));
return NULL;
}
}
@@ -274,6 +309,25 @@ parse_expr(struct compile_ctx *ctx, struct script_typeinfo *expected_typeinfo)
{
(void) expected_typeinfo; /* TODO: use this hint to upcast types if needed */
+ /* special-case assignment until we have a proper pratt parser for expressions */
+ if (peek(ctx, 0).type == SCR_TOKEN_IDENT && peek(ctx, 1).type == SCR_TOKEN_EQUALS) {
+ struct script_expr *expr = ALLOC_SIZED(&ctx->arena, struct script_expr);
+ assert(expr);
+
+ expr->type = SCR_EXPR_ASSIGNMENT;
+
+ struct script_flystr ident = expect(ctx, SCR_TOKEN_IDENT).ident;
+ struct script_symbol *sym = symbol_table_find(&ctx->symtab, ident);
+ assert(sym);
+
+ expect(ctx, SCR_TOKEN_EQUALS);
+
+ expr->assignment.ident = sym->ident;
+ expr->assignment.rhs = parse_expr(ctx, sym->variable.typeinfo);
+
+ return expr;
+ }
+
struct script_expr *stack[128];
size_t i = 0;
@@ -315,6 +369,7 @@ parse_expr(struct compile_ctx *ctx, struct script_typeinfo *expected_typeinfo)
case SCR_TOKEN_MINUS:
case SCR_TOKEN_STAR:
case SCR_TOKEN_RSLASH:
+ case SCR_TOKEN_DOUBLE_EQUALS:
expr = ALLOC_SIZED(&ctx->arena, struct script_expr);
assert(expr);
@@ -324,6 +379,10 @@ parse_expr(struct compile_ctx *ctx, struct script_typeinfo *expected_typeinfo)
case SCR_TOKEN_MINUS: expr->binary_op.type = SCR_BINARY_OP_SUB; break;
case SCR_TOKEN_STAR: expr->binary_op.type = SCR_BINARY_OP_MUL; break;
case SCR_TOKEN_RSLASH: expr->binary_op.type = SCR_BINARY_OP_DIV; break;
+
+ case SCR_TOKEN_DOUBLE_EQUALS:
+ expr->binary_op.type = SCR_BINARY_OP_EQU; break;
+
default: UNREACHABLE(); break;
}
@@ -362,26 +421,31 @@ end:
}
static struct script_stmt *
-parse_return(struct compile_ctx *ctx, struct script_symbol *parent)
-{
- (void) parent;
+parse_statement(struct compile_ctx *ctx, struct script_symbol *parent_scope);
+static struct script_stmt *
+parse_block(struct compile_ctx *ctx, struct script_symbol *parent_scope)
+{
struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
assert(stmt);
- stmt->type = SCR_STMT_RET;
- expect(ctx, SCR_TOKEN_RETURN);
- stmt->ret.expr = parse_expr(ctx, NULL);
+ stmt->type = SCR_STMT_BLOCK;
+ stmt->block.children.head = stmt->block.children.tail = NULL;
- /* TODO: validate that return type is the same as parent scope function type */
+ expect(ctx, SCR_TOKEN_LBRACE);
- expect(ctx, SCR_TOKEN_SEMICOLON);
+ while (peek(ctx, 0).type != SCR_TOKEN_RBRACE) {
+ struct script_stmt *child = parse_statement(ctx, parent_scope);
+ list_push_tail(&stmt->block.children, &child->list_node);
+ }
+
+ expect(ctx, SCR_TOKEN_RBRACE);
return stmt;
}
static struct script_stmt *
-parse_vardecl(struct compile_ctx *ctx, struct script_symbol *parent_scope)
+parse_declstmt(struct compile_ctx *ctx, struct script_symbol *parent_scope)
{
struct script_symbol *sym = symbol_table_push(&ctx->symtab);
assert(sym);
@@ -393,17 +457,17 @@ parse_vardecl(struct compile_ctx *ctx, struct script_symbol *parent_scope)
struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
assert(stmt);
- stmt->type = SCR_STMT_VARDECL;
- sym->ident = stmt->vardecl.ident = expect(ctx, SCR_TOKEN_IDENT).ident;
+ stmt->type = SCR_STMT_DECL;
+ sym->ident = stmt->decl.ident = expect(ctx, SCR_TOKEN_IDENT).ident;
expect(ctx, SCR_TOKEN_COLON);
- sym->variable.typeinfo = stmt->vardecl.typeinfo = parse_typeinfo(ctx);
+ sym->variable.typeinfo = stmt->decl.typeinfo = parse_typeinfo(ctx);
sym->variable.addr = symbol_table_next_addr(&ctx->symtab, sym->variable.typeinfo);
expect(ctx, SCR_TOKEN_EQUALS);
- stmt->vardecl.expr = parse_expr(ctx, sym->variable.typeinfo);
+ stmt->decl.expr = parse_expr(ctx, sym->variable.typeinfo);
- assert(stmt->vardecl.typeinfo == stmt->vardecl.expr->typeinfo);
+ assert(stmt->decl.typeinfo == stmt->decl.expr->typeinfo);
expect(ctx, SCR_TOKEN_SEMICOLON);
@@ -411,15 +475,109 @@ parse_vardecl(struct compile_ctx *ctx, struct script_symbol *parent_scope)
}
static struct script_stmt *
-parse_statement(struct compile_ctx *ctx, struct script_symbol *parent)
+parse_exprstmt(struct compile_ctx *ctx, struct script_symbol *parent_scope)
+{
+ (void) parent_scope;
+
+ struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
+ assert(stmt);
+
+ stmt->type = SCR_STMT_EXPR;
+ stmt->expr = parse_expr(ctx, NULL);
+
+ expect(ctx, SCR_TOKEN_SEMICOLON);
+
+ return stmt;
+}
+
+static struct script_stmt *
+parse_if_else(struct compile_ctx *ctx, struct script_symbol *parent_scope)
+{
+ struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
+ assert(stmt);
+
+ stmt->type = SCR_STMT_IF_ELSE;
+
+ expect(ctx, SCR_TOKEN_IF);
+
+ expect(ctx, SCR_TOKEN_LPAREN);
+ stmt->if_else.cond = parse_expr(ctx, NULL);
+ assert(stmt->if_else.cond);
+
+ expect(ctx, SCR_TOKEN_RPAREN);
+
+ stmt->if_else.if_body = parse_statement(ctx, parent_scope);
+ assert(stmt->if_else.if_body);
+
+ if (peek(ctx, 0).type == SCR_TOKEN_ELSE) {
+ next(ctx);
+ stmt->if_else.else_body = parse_statement(ctx, parent_scope);
+ assert(stmt->if_else.else_body);
+ }
+
+ return stmt;
+}
+
+static struct script_stmt *
+parse_while_loop(struct compile_ctx *ctx, struct script_symbol *parent_scope)
+{
+ struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
+ assert(stmt);
+
+ stmt->type = SCR_STMT_WHILE_LOOP;
+
+ expect(ctx, SCR_TOKEN_WHILE);
+
+ expect(ctx, SCR_TOKEN_LPAREN);
+ stmt->while_loop.cond = parse_expr(ctx, NULL);
+ expect(ctx, SCR_TOKEN_RPAREN);
+
+ stmt->while_loop.while_body = parse_statement(ctx, parent_scope);
+
+ return stmt;
+}
+
+static struct script_stmt *
+parse_return(struct compile_ctx *ctx, struct script_symbol *parent_scope)
+{
+ (void) parent_scope;
+
+ struct script_stmt *stmt = ALLOC_SIZED(&ctx->arena, struct script_stmt);
+ assert(stmt);
+
+ stmt->type = SCR_STMT_RET;
+ expect(ctx, SCR_TOKEN_RETURN);
+ stmt->ret.expr = parse_expr(ctx, NULL);
+
+ /* TODO: validate that return type is the same as parent scope function type? */
+
+ expect(ctx, SCR_TOKEN_SEMICOLON);
+
+ return stmt;
+}
+
+static struct script_stmt *
+parse_statement(struct compile_ctx *ctx, struct script_symbol *parent_scope)
{
struct script_token tok = peek(ctx, 0);
switch (tok.type) {
- case SCR_TOKEN_RETURN:
- return parse_return(ctx, parent);
+ case SCR_TOKEN_LBRACE:
+ return parse_block(ctx, parent_scope);
case SCR_TOKEN_IDENT:
- return parse_vardecl(ctx, parent);
+ if (peek(ctx, 1).type == SCR_TOKEN_COLON)
+ return parse_declstmt(ctx, parent_scope);
+ else
+ return parse_exprstmt(ctx, parent_scope);
+
+ case SCR_TOKEN_IF:
+ return parse_if_else(ctx, parent_scope);
+
+ case SCR_TOKEN_WHILE:
+ return parse_while_loop(ctx, parent_scope);
+
+ case SCR_TOKEN_RETURN:
+ return parse_return(ctx, parent_scope);
default: {
char buf[64];
@@ -446,6 +604,7 @@ primitive_ir_typeinfo(enum script_ir_type type)
[SCR_IR_TYPE_F32] = { .type = SCR_IR_TYPE_F32, .size = 4, .alignment = 4, },
[SCR_IR_TYPE_F64] = { .type = SCR_IR_TYPE_F64, .size = 8, .alignment = 8, },
[SCR_IR_TYPE_C8] = { .type = SCR_IR_TYPE_C8, .size = 1, .alignment = 1, },
+ [SCR_IR_TYPE_PTR] = { .type = SCR_IR_TYPE_PTR, .size = 8, .alignment = 8, },
};
return &typeinfos[type];
@@ -469,6 +628,8 @@ typeinfo_to_ir_typeinfo(struct script_typeinfo *typeinfo)
case SCR_TYPE_F64: return primitive_ir_typeinfo(SCR_IR_TYPE_F64);
case SCR_TYPE_C8: return primitive_ir_typeinfo(SCR_IR_TYPE_C8);
+
+ // TODO: handle pointer types?
}
}
@@ -484,8 +645,8 @@ emit_expr(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script
scratch->opcode = SCR_IR_LOAD;
scratch->typeinfo = typeinfo_to_ir_typeinfo(expr->typeinfo);
- scratch->operands[0].type = SCR_IR_OPERAND_POINTER;
- scratch->operands[0].pointer = sym->variable.addr;
+ scratch->operands[0].type = SCR_IR_OPERAND_ADDRESS;
+ scratch->operands[0].address = sym->variable.addr;
scratch->operand_count = 1;
break;
@@ -497,18 +658,34 @@ emit_expr(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script
scratch->operand_count = 1;
break;
- case SCR_EXPR_BINARY_OP:
- if (emit_expr(ctx, scratch, expr->binary_op.rhs) < 0)
+ case SCR_EXPR_ASSIGNMENT:
+ sym = symbol_table_find(&ctx->symtab, expr->assignment.ident);
+ assert(sym);
+ assert(sym->type == SCR_SYMBOL_VARIABLE);
+
+ if (emit_expr(ctx, scratch, expr->assignment.rhs) < 0)
return -1;
+ scratch->opcode = SCR_IR_STORE;
+ scratch->typeinfo = typeinfo_to_ir_typeinfo(sym->variable.typeinfo);
+ scratch->operands[0].type = SCR_IR_OPERAND_ADDRESS;
+ scratch->operands[0].address = sym->variable.addr;
+ scratch->operand_count = 1;
+ break;
+
+ case SCR_EXPR_BINARY_OP:
if (emit_expr(ctx, scratch, expr->binary_op.lhs) < 0)
return -1;
+ if (emit_expr(ctx, scratch, expr->binary_op.rhs) < 0)
+ return -1;
+
switch (expr->binary_op.type) {
case SCR_BINARY_OP_ADD: scratch->opcode = SCR_IR_ADD; break;
case SCR_BINARY_OP_SUB: scratch->opcode = SCR_IR_SUB; break;
case SCR_BINARY_OP_MUL: scratch->opcode = SCR_IR_MUL; break;
case SCR_BINARY_OP_DIV: scratch->opcode = SCR_IR_DIV; break;
+ case SCR_BINARY_OP_EQU: scratch->opcode = SCR_IR_CMP; break;
}
scratch->typeinfo = typeinfo_to_ir_typeinfo(expr->typeinfo);
@@ -516,45 +693,51 @@ emit_expr(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script
break;
}
- return ir_push(&ctx->ir, scratch);
+ if (!ir_push(&ctx->ir, scratch))
+ return -1;
+
+ return 0;
}
static int
-emit_return(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
-{
- assert(stmt->type == SCR_STMT_RET);
- assert(is_primitive_typeinfo(stmt->ret.expr->typeinfo));
+emit(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt);
- if (emit_expr(ctx, scratch, stmt->ret.expr) < 0)
- return -1;
+static int
+emit_block(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
+{
+ SCRIPT_LIST_ITER(&stmt->block.children) {
+ struct script_stmt *child = SCRIPT_FROM_NODE(it, struct script_stmt, list_node);
- scratch->opcode = SCR_IR_RET;
- scratch->typeinfo = typeinfo_to_ir_typeinfo(stmt->ret.expr->typeinfo);
- scratch->operand_count = 0;
+ if (emit(ctx, scratch, child) < 0)
+ return -1;
+ }
- return ir_push(&ctx->ir, scratch);
+ return 0;
}
static int
-emit_vardecl(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
+emit_declstmt(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
{
- assert(stmt->type == SCR_STMT_VARDECL);
- assert(is_primitive_typeinfo(stmt->vardecl.typeinfo));
+ assert(stmt->type == SCR_STMT_DECL);
+ assert(is_primitive_typeinfo(stmt->decl.typeinfo));
- struct script_symbol *sym = symbol_table_find(&ctx->symtab, stmt->vardecl.ident);
+ struct script_symbol *sym = symbol_table_find(&ctx->symtab, stmt->decl.ident);
assert(sym);
assert(sym->type == SCR_SYMBOL_VARIABLE);
- if (emit_expr(ctx, scratch, stmt->vardecl.expr) < 0)
+ if (emit_expr(ctx, scratch, stmt->decl.expr) < 0)
return -1;
scratch->opcode = SCR_IR_STORE;
- scratch->typeinfo = typeinfo_to_ir_typeinfo(stmt->vardecl.typeinfo);
- scratch->operands[0].type = SCR_IR_OPERAND_POINTER;
- scratch->operands[0].pointer = sym->variable.addr;
+ scratch->typeinfo = typeinfo_to_ir_typeinfo(stmt->decl.typeinfo);
+ scratch->operands[0].type = SCR_IR_OPERAND_ADDRESS;
+ scratch->operands[0].address = sym->variable.addr;
scratch->operand_count = 1;
- return ir_push(&ctx->ir, scratch);
+ if (!ir_push(&ctx->ir, scratch))
+ return -1;
+
+ return 0;
#if 0 /* TODO: more advanced rules for emitting variables */
if (sym->parent) { /* this is a function-local variable, enable full expressions */
@@ -566,28 +749,123 @@ emit_vardecl(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct scr
}
static int
-emit(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt);
+emit_if_else(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
+{
+ assert(stmt->type == SCR_STMT_IF_ELSE);
+
+ if (emit_expr(ctx, scratch, stmt->if_else.cond) < 0)
+ return -1;
+
+ scratch->opcode = SCR_IR_JNE;
+ scratch->typeinfo = NULL;
+ scratch->operands[0].type = SCR_IR_OPERAND_OFFSET;
+ scratch->operands[0].offset = 0;
+ scratch->operand_count = 1;
+
+ // TODO: patch this with if_body length
+ struct script_ir_inst *cond_failed_jump = ir_push(&ctx->ir, scratch);
+ if (!cond_failed_jump)
+ return -1;
+
+ if (emit(ctx, scratch, stmt->if_else.if_body) < 0)
+ return -1;
+
+ cond_failed_jump->operands[0].literal = ir_current_inst(&ctx->ir)
+ - cond_failed_jump;
+ if (!stmt->if_else.else_body)
+ goto end;
+
+ scratch->opcode = SCR_IR_JMP;
+ scratch->typeinfo = NULL;
+ scratch->operands[0].type = SCR_IR_OPERAND_OFFSET;
+ scratch->operands[0].offset = 0;
+ scratch->operand_count = 1;
+
+ struct script_ir_inst *else_block_skip = ir_push(&ctx->ir, scratch);
+ if (!else_block_skip)
+ return -1;
+
+ // if we have an else block, we must take into account the else-block-skip jump
+ cond_failed_jump->operands[0].literal = ir_current_inst(&ctx->ir)
+ - cond_failed_jump;
+
+ if (emit(ctx, scratch, stmt->if_else.else_body) < 0)
+ return -1;
+
+ else_block_skip->operands[0].literal = ir_current_inst(&ctx->ir)
+ - else_block_skip;
+
+end:
+ return 0;
+}
static int
-emit_block(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
+emit_while_loop(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
{
- (void) ctx;
- (void) scratch;
- (void) stmt;
+ assert(stmt->type == SCR_STMT_WHILE_LOOP);
- /* TODO: reserve enough stack space for all locals */
- /* TODO: implement me */
+ struct script_ir_inst const *top_of_loop = ir_current_inst(&ctx->ir);
- return -1;
+ if (emit_expr(ctx, scratch, stmt->while_loop.cond) < 0)
+ return -1;
+
+ scratch->opcode = SCR_IR_JEQ;
+ scratch->typeinfo = NULL;
+ scratch->operands[0].type = SCR_IR_OPERAND_OFFSET;
+ scratch->operands[0].offset = 0;
+ scratch->operand_count = 1;
+
+ struct script_ir_inst *cond_failed_jump = ir_push(&ctx->ir, scratch);
+ if (!cond_failed_jump)
+ return -1;
+
+ if (emit(ctx, scratch, stmt->while_loop.while_body) < 0)
+ return -1;
+
+ scratch->opcode = SCR_IR_JMP;
+ scratch->typeinfo = NULL;
+ scratch->operands[0].type = SCR_IR_OPERAND_OFFSET;
+ scratch->operands[0].offset = top_of_loop - ir_current_inst(&ctx->ir);
+ scratch->operand_count = 1;
+
+ struct script_ir_inst *jump_to_top = ir_push(&ctx->ir, scratch);
+ if (!jump_to_top)
+ return -1;
+
+ cond_failed_jump->operands[0].literal = ir_current_inst(&ctx->ir) - cond_failed_jump;
+
+ return 0;
+}
+
+static int
+emit_return(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
+{
+ assert(stmt->type == SCR_STMT_RET);
+ assert(is_primitive_typeinfo(stmt->ret.expr->typeinfo));
+
+ if (emit_expr(ctx, scratch, stmt->ret.expr) < 0)
+ return -1;
+
+ scratch->opcode = SCR_IR_RET;
+ scratch->typeinfo = typeinfo_to_ir_typeinfo(stmt->ret.expr->typeinfo);
+ scratch->operand_count = 0;
+
+ if (!ir_push(&ctx->ir, scratch))
+ return -1;
+
+ return 0;
}
static int
emit(struct compile_ctx *ctx, struct script_ir_inst *scratch, struct script_stmt *stmt)
{
switch (stmt->type) {
- case SCR_STMT_BLOCK: return emit_block(ctx, scratch, stmt);
- case SCR_STMT_VARDECL: return emit_vardecl(ctx, scratch, stmt);
- case SCR_STMT_RET: return emit_return(ctx, scratch, stmt);
+ case SCR_STMT_BLOCK: return emit_block(ctx, scratch, stmt);
+ case SCR_STMT_DECL: return emit_declstmt(ctx, scratch, stmt);
+ case SCR_STMT_EXPR: return emit_expr(ctx, scratch, stmt->expr);
+ case SCR_STMT_IF_ELSE: return emit_if_else(ctx, scratch, stmt);
+ case SCR_STMT_WHILE_LOOP: return emit_while_loop(ctx, scratch, stmt);
+ case SCR_STMT_RET: return emit_return(ctx, scratch, stmt);
}
return -1;
@@ -603,8 +881,10 @@ parse(struct compile_ctx *ctx)
struct script_stmt *stmt = parse_statement(ctx, NULL);
list_push_tail(&ctx->ast.roots, &stmt->list_node);
- if (emit(ctx, &inst, stmt) < 0)
+ if (emit(ctx, &inst, stmt) < 0) {
+ dbglog(ctx, "error: failed to emit statement\n");
return -1;
+ }
}
assert(ctx->stream.cur == ctx->stream.len);
diff --git a/libscript/libscript.h b/libscript/libscript.h
@@ -48,28 +48,6 @@ struct script_list {
enum script_token_type {
SCR_TOKEN_EOF,
- /* literals */
- SCR_TOKEN_IDENT,
- SCR_TOKEN_LITERAL_INT,
-
- /* keywords */
- SCR_TOKEN_U8,
- SCR_TOKEN_U16,
- SCR_TOKEN_U32,
- SCR_TOKEN_U64,
-
- SCR_TOKEN_S8,
- SCR_TOKEN_S16,
- SCR_TOKEN_S32,
- SCR_TOKEN_S64,
-
- SCR_TOKEN_F32,
- SCR_TOKEN_F64,
-
- SCR_TOKEN_C8,
-
- SCR_TOKEN_RETURN,
-
/* punctuation */
SCR_TOKEN_LPAREN = '(',
SCR_TOKEN_RPAREN = ')',
@@ -92,8 +70,43 @@ enum script_token_type {
SCR_TOKEN_PLUS = '+',
SCR_TOKEN_MINUS = '-',
SCR_TOKEN_STAR = '*',
+
+ _SCR_TOKEN_MULTICHAR = 128, /* helper to bump enum values ouside of ascii range */
+
+ /* multi-char punctuation */
+ SCR_TOKEN_DOUBLE_COLON,
+ SCR_TOKEN_DOUBLE_EQUALS,
+
+ /* literals */
+ SCR_TOKEN_IDENT,
+ SCR_TOKEN_LITERAL_INT,
+
+ /* keywords */
+ SCR_TOKEN_U8,
+ SCR_TOKEN_U16,
+ SCR_TOKEN_U32,
+ SCR_TOKEN_U64,
+
+ SCR_TOKEN_S8,
+ SCR_TOKEN_S16,
+ SCR_TOKEN_S32,
+ SCR_TOKEN_S64,
+
+ SCR_TOKEN_F32,
+ SCR_TOKEN_F64,
+
+ SCR_TOKEN_C8,
+
+ SCR_TOKEN_RETURN,
+
+ SCR_TOKEN_IF,
+ SCR_TOKEN_ELSE,
+ SCR_TOKEN_WHILE,
};
+extern char const *
+script_token_type_str(enum script_token_type type);
+
struct script_token {
enum script_token_type type;
@@ -134,6 +147,7 @@ struct script_typeinfo {
enum script_expr_type {
SCR_EXPR_IDENT,
SCR_EXPR_LITERAL_INT,
+ SCR_EXPR_ASSIGNMENT,
SCR_EXPR_BINARY_OP,
};
@@ -148,11 +162,17 @@ struct script_expr {
uint64_t literal_int;
struct {
+ struct script_flystr ident;
+ struct script_expr *rhs;
+ } assignment;
+
+ struct {
enum {
SCR_BINARY_OP_ADD,
SCR_BINARY_OP_SUB,
SCR_BINARY_OP_MUL,
SCR_BINARY_OP_DIV,
+ SCR_BINARY_OP_EQU,
} type;
struct script_expr *lhs, *rhs;
@@ -162,7 +182,10 @@ struct script_expr {
enum script_stmt_type {
SCR_STMT_BLOCK,
- SCR_STMT_VARDECL,
+ SCR_STMT_DECL,
+ SCR_STMT_EXPR,
+ SCR_STMT_IF_ELSE,
+ SCR_STMT_WHILE_LOOP,
SCR_STMT_RET,
};
@@ -178,7 +201,19 @@ struct script_stmt {
struct script_flystr ident;
struct script_typeinfo *typeinfo;
struct script_expr *expr;
- } vardecl;
+ } decl;
+
+ struct script_expr *expr;
+
+ struct {
+ struct script_expr *cond;
+ struct script_stmt *if_body, *else_body;
+ } if_else;
+
+ struct {
+ struct script_expr *cond;
+ struct script_stmt *while_body;
+ } while_loop;
struct {
struct script_expr *expr;
@@ -220,12 +255,20 @@ enum script_ir_opcode {
SCR_IR_POP,
SCR_IR_RET,
+ SCR_IR_CMP,
+ SCR_IR_JMP,
+ SCR_IR_JNE,
+ SCR_IR_JEQ,
+
SCR_IR_ADD,
SCR_IR_SUB,
SCR_IR_MUL,
SCR_IR_DIV,
};
+extern char const *
+script_ir_opcode_str(enum script_ir_opcode opcode);
+
enum script_ir_type {
SCR_IR_TYPE_U8,
SCR_IR_TYPE_U16,
@@ -245,6 +288,9 @@ enum script_ir_type {
SCR_IR_TYPE_PTR,
};
+extern char const *
+script_ir_type_str(enum script_ir_type type);
+
struct script_ir_typeinfo {
enum script_ir_type type;
size_t size, alignment;
@@ -252,16 +298,16 @@ struct script_ir_typeinfo {
enum script_ir_operand_type {
SCR_IR_OPERAND_LITERAL,
- SCR_IR_OPERAND_POINTER,
SCR_IR_OPERAND_ADDRESS,
+ SCR_IR_OPERAND_OFFSET,
};
struct script_ir_operand {
enum script_ir_operand_type type;
union {
uint64_t literal;
- uintptr_t pointer;
uintptr_t address;
+ int64_t offset;
};
};
diff --git a/libscript/libscript_internal.h b/libscript/libscript_internal.h
@@ -207,16 +207,22 @@ struct ir {
size_t cap, len;
};
-static inline int
+static inline struct script_ir_inst const *
+ir_current_inst(struct ir *ir)
+{
+ return &ir->ptr[ir->len];
+}
+
+static inline struct script_ir_inst *
ir_push(struct ir *ir, struct script_ir_inst *inst)
{
if (ir->len >= ir->cap)
- return -1;
+ return NULL;
struct script_ir_inst *instruction = &ir->ptr[ir->len++];
*instruction = *inst;
- return 0;
+ return instruction;
}
struct compile_ctx {
@@ -248,9 +254,6 @@ dbglog(struct compile_ctx *ctx, char const *fmt, ...)
va_end(va);
}
-static char const *
-dump_token_type_str(enum script_token_type type);
-
static int
dump_token(struct script_token *token, char *buf, size_t cap);
diff --git a/scriptvm/scriptvm.c b/scriptvm/scriptvm.c
@@ -116,53 +116,28 @@ parse_opts(int argc, char **argv)
static void
emit(int fd, struct script_program const *prog)
{
- static char const *opcode_str[] = {
- [SCR_IR_LOAD] = "LOAD",
- [SCR_IR_STORE] = "STORE",
- [SCR_IR_PUSH] = "PUSH",
- [SCR_IR_POP] = "POP",
- [SCR_IR_RET] = "RET",
- [SCR_IR_ADD] = "ADD",
- [SCR_IR_SUB] = "SUB",
- [SCR_IR_MUL] = "MUL",
- [SCR_IR_DIV] = "DIV",
- };
-
- static char const *typeinfo_str[] = {
- [SCR_IR_TYPE_U8] = "U8",
- [SCR_IR_TYPE_U16] = "U16",
- [SCR_IR_TYPE_U32] = "U32",
- [SCR_IR_TYPE_U64] = "U64",
- [SCR_IR_TYPE_S8] = "S8",
- [SCR_IR_TYPE_S16] = "S16",
- [SCR_IR_TYPE_S32] = "S32",
- [SCR_IR_TYPE_S64] = "S64",
- [SCR_IR_TYPE_F32] = "F32",
- [SCR_IR_TYPE_F64] = "F64",
- [SCR_IR_TYPE_C8] = "C8",
- [SCR_IR_TYPE_PTR] = "PTR",
- };
-
for (size_t i = 0; i < prog->instructions.len; i++) {
struct script_ir_inst *inst = prog->instructions.ptr + i;
- dprintf(fd, "[%03zu] %7s<%s> ",
- i, opcode_str[inst->opcode], typeinfo_str[inst->typeinfo->type]);
+ dprintf(fd, "[%03zu] %5s ", i, script_ir_opcode_str(inst->opcode));
+
+ if (inst->typeinfo)
+ dprintf(fd, "<%s> ", script_ir_type_str(inst->typeinfo->type));
for (size_t j = 0; j < inst->operand_count; j++) {
struct script_ir_operand *operand = &inst->operands[j];
switch (operand->type) {
case SCR_IR_OPERAND_LITERAL:
- dprintf(fd, "LIT{0x%" PRIx64 "}", operand->literal);
+ dprintf(fd, "LITERAL{0x%" PRIx64 "}", operand->literal);
break;
- case SCR_IR_OPERAND_POINTER:
- dprintf(fd, "PTR{0x%" PRIx64 "}", operand->pointer);
+ case SCR_IR_OPERAND_ADDRESS:
+ dprintf(fd, "ADDRESS{0x%" PRIx64 "}", operand->address);
break;
- case SCR_IR_OPERAND_ADDRESS:
- dprintf(fd, "ADDR{0x%" PRIx64 "}", operand->address);
+ case SCR_IR_OPERAND_OFFSET:
+ dprintf(fd, "OFFSET{%" PRIi64 "}", operand->offset);
break;
}
@@ -173,10 +148,12 @@ emit(int fd, struct script_program const *prog)
}
}
+typedef uint64_t reg_t;
+
struct vm_state {
- size_t pc, sp;
+ reg_t pc, sp, acc;
- uint64_t r0;
+ reg_t r0;
struct {
unsigned char *ptr;
@@ -188,12 +165,13 @@ static void
dump_vm_state(struct vm_state *vm)
{
fprintf(stderr, "vm state:\n");
- fprintf(stderr, "\tpc: 0x%" PRIx64 ", sp: 0x%" PRIu64 "\n", vm->pc, vm->sp);
+ fprintf(stderr, "\tpc: 0x%" PRIx64 ", sp: 0x%" PRIx64 ", acc: 0x%" PRIx64 "\n",
+ vm->pc, vm->sp, vm->acc);
fprintf(stderr, "\tr0: 0x%" PRIx64 "\n", vm->r0);
}
static inline uint64_t
-pop(struct vm_state *vm, struct script_ir_typeinfo *typeinfo)
+pop(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo)
{
uint64_t value = 0;
@@ -204,11 +182,13 @@ pop(struct vm_state *vm, struct script_ir_typeinfo *typeinfo)
assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment));
memcpy(&value, storage, typeinfo->size);
+ vm->acc = value;
+
return value;
}
static inline void
-push(struct vm_state *vm, struct script_ir_typeinfo *typeinfo, uint64_t value)
+push(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo, uint64_t value)
{
assert(vm->sp + typeinfo->size < vm->stack.len);
@@ -216,11 +196,14 @@ push(struct vm_state *vm, struct script_ir_typeinfo *typeinfo, uint64_t value)
assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment));
memcpy(storage, &value, typeinfo->size);
+ // TODO: should push update the accumulator?
+ vm->acc = value;
+
vm->sp += typeinfo->size;
}
static inline void
-load(struct vm_state *vm, struct script_ir_typeinfo *typeinfo, uintptr_t addr)
+load(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo, uintptr_t addr)
{
uint64_t value = 0;
@@ -229,20 +212,25 @@ load(struct vm_state *vm, struct script_ir_typeinfo *typeinfo, uintptr_t addr)
assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment));
memcpy(&value, storage, typeinfo->size);
+ vm->acc = value;
+
push(vm, typeinfo, value);
}
static inline void
-store(struct vm_state *vm, struct script_ir_typeinfo *typeinfo, uintptr_t addr, uint64_t val)
+store(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo, uintptr_t addr, uint64_t value)
{
assert(addr + typeinfo->size < vm->heap.len);
unsigned char *storage = vm->heap.ptr + addr;
assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment));
- memcpy(storage, &val, typeinfo->size);
+ memcpy(storage, &value, typeinfo->size);
+
+ // TODO: should push update the accumulator?
+ vm->acc = value;
}
static inline uint64_t
-arithmetic(enum script_ir_opcode opcode, struct script_ir_typeinfo *typeinfo,
+arithmetic(enum script_ir_opcode opcode, struct script_ir_typeinfo const *typeinfo,
uint64_t lhs, uint64_t rhs)
{
#define map(lhs, rhs, op, Tin, Tout) \
@@ -296,20 +284,20 @@ interpret(struct vm_state *vm, struct script_program *program)
struct script_ir_inst *inst = &program->instructions.ptr[vm->pc];
if (opts.verbose)
- fprintf(stderr, "inst: %d\n", inst->opcode);
+ fprintf(stderr, "inst: %s\n", script_ir_opcode_str(inst->opcode));
switch (inst->opcode) {
case SCR_IR_LOAD: {
assert(inst->operand_count);
- assert(inst->operands[0].type == SCR_IR_OPERAND_POINTER);
- load(vm, inst->typeinfo, inst->operands[0].pointer);
+ assert(inst->operands[0].type == SCR_IR_OPERAND_ADDRESS);
+ load(vm, inst->typeinfo, inst->operands[0].address);
} break;
case SCR_IR_STORE: {
assert(inst->operand_count);
- assert(inst->operands[0].type == SCR_IR_OPERAND_POINTER);
+ assert(inst->operands[0].type == SCR_IR_OPERAND_ADDRESS);
uint64_t value = pop(vm, inst->typeinfo);
- store(vm, inst->typeinfo, inst->operands[0].pointer, value);
+ store(vm, inst->typeinfo, inst->operands[0].address, value);
} break;
case SCR_IR_PUSH: {
@@ -328,20 +316,54 @@ interpret(struct vm_state *vm, struct script_program *program)
vm->r0 = pop(vm, inst->typeinfo);
} break;
+ case SCR_IR_CMP: {
+ assert(inst->operand_count == 0);
+ uint64_t lhs = pop(vm, inst->typeinfo);
+ uint64_t rhs = pop(vm, inst->typeinfo);
+ vm->acc = lhs - rhs;
+ } break;
+
+ case SCR_IR_JMP: {
+ assert(inst->operand_count == 1);
+ assert(inst->operands[0].type == SCR_IR_OPERAND_OFFSET);
+ vm->pc += inst->operands[0].literal;
+ goto next_vm_iter;
+ } break;
+
+ case SCR_IR_JNE: {
+ assert(inst->operand_count == 1);
+ assert(inst->operands[0].type == SCR_IR_OPERAND_OFFSET);
+ if (vm->acc != 0) {
+ vm->pc += inst->operands[0].literal;
+ goto next_vm_iter;
+ }
+ } break;
+
+ case SCR_IR_JEQ: {
+ assert(inst->operand_count == 1);
+ assert(inst->operands[0].type == SCR_IR_OPERAND_OFFSET);
+ if (vm->acc == 0) {
+ vm->pc += inst->operands[0].literal;
+ goto next_vm_iter;
+ }
+ } break;
+
case SCR_IR_ADD:
case SCR_IR_SUB:
case SCR_IR_MUL:
case SCR_IR_DIV: {
assert(inst->operand_count == 0);
- uint64_t lhs = pop(vm, inst->typeinfo);
uint64_t rhs = pop(vm, inst->typeinfo);
+ uint64_t lhs = pop(vm, inst->typeinfo);
uint64_t res = arithmetic(inst->opcode, inst->typeinfo, lhs, rhs);
+ vm->acc = res;
push(vm, inst->typeinfo, res);
} break;
}
vm->pc++;
+next_vm_iter:
if (opts.verbose)
dump_vm_state(vm);
}