commit ea67d2dce57bf5c68942c320c78790c80d7acc36
Author: MikoĊaj Lenczewski <mblenczewski@gmail.com>
Date: Sun, 12 Jan 2025 20:26:49 +0000
Initial commit
Start parser for simplified script grammar, including just blocks,
variable declarations, and return statements. Start both scriptvm and
scriptcc drivers, for interpreting script via a bytecode vm and
compiling script into a native executable, respectively.
Have yet to finish the scriptvm stub to evaluate the parsed AST. Need to
track variable declaration typeinfos instead of kludg-ing with the
default typeinfo. Need to handle literal typeinfos better.
Diffstat:
15 files changed, 1364 insertions(+), 0 deletions(-)
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,17 @@
+root = true
+
+[*]
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+charset = utf-8
+
+guidelines = 80, 120, 160
+
+[*.{c,h}]
+indent_style = tab
+indent_size = 8
+
+[*.{grammar,md,txt}]
+indent_style = space
+indent_size = 2
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+bin/
+obj/
+out/
+
+**/.*.swp
+imgui.ini
diff --git a/README.txt b/README.txt
@@ -0,0 +1,2 @@
+script
+==============================================================================
diff --git a/build.sh b/build.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+CC="${CC:-clang}"
+AR="${AR:-llvm-ar}"
+RANLIB="${RANLIB:-llvm-ranlib}"
+
+WARNINGS="-Wall -Wextra -Wpedantic ${WERROR:+-Werror}"
+
+CFLAGS="-std=c11 -O0 -g"
+CPPFLAGS="-UNDEBUG"
+LDFLAGS=""
+
+set -ex
+
+mkdir -p bin obj
+
+$CC -o obj/libscript.o -c libscript/libscript.c \
+ $WARNINGS $CFLAGS $CPPFLAGS $LDFLAGS
+$AR rcs bin/libscript.a obj/libscript.o
+$RANLIB bin/libscript.a
+
+$CC -o bin/scriptvm scriptvm/scriptvm.c bin/libscript.a \
+ $WARNINGS $CFLAGS $CPPFLAGS -I libscript/ $LDFLAGS
+
+#$CC -o bin/scriptcc scriptcc/scriptcc.c bin/libscript.a \
+# $WARNINGS $CFLAGS $CPPFLAGS -I libscript/ $LDFLAGS
diff --git a/clean.sh b/clean.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+set -ex
+
+rm -rf bin/ obj/ out/
diff --git a/debug.sh b/debug.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+case "$1" in
+ vm)
+ lldbgui -- bin/scriptvm -v examples/test.script
+ ;;
+ cc)
+ mkdir -p out
+ lldbgui -- bin/scriptcc -v -o out/test examples/test.scipt
+ ;;
+
+ *)
+ echo "Unknown mode: ${1:-<none>}, must be one of: vm cc"
+ exit 1
+ ;;
+esac
diff --git a/docs/script.grammar b/docs/script.grammar
@@ -0,0 +1,140 @@
+program =
+ | symbol program
+ | $eof
+ ;
+
+identifier =
+ | $(sequence of alphanumeric chars or '_' not starting with a number)
+ ;
+
+literal =
+ | int-literal
+ | chr-literal
+ | str-literal
+ ;
+
+int-literal =
+ | $(optionally prefixed series of numbers)
+ ;
+
+chr-literal =
+ | '\'' $(optionally escaped ascii codepoint) '\''
+ ;
+
+str-literal =
+ | '"' $(sequence of optionally escaped ascii codepoints) '"'
+ ;
+
+symbol =
+ | type-definition
+ | func-definition
+ | var-definition
+ ;
+
+type-definition =
+ | identifier-list '::' typename
+ ;
+
+identifier-list =
+ | identifier
+ | identifier ',' identifier-list
+ ;
+
+typename =
+ | 'utf8'
+ | 'chr8'
+ | 'u64'
+ | 's64'
+ | identifier
+ | pointer-type
+ | array-type
+ | struct-type
+ ;
+
+pointer-type =
+ | '*' typename
+ ;
+
+array-type =
+ | '[' int-literal ']' typename
+ ;
+
+struct-type =
+ | 'struct' '{' [ struct-member-list ] '}'
+ ;
+
+struct-member-list =
+ | struct-member
+ | struct-member struct-member-list
+ ;
+
+struct-member =
+ | identifier-list ':' typename ';'
+ ;
+
+var-definition =
+ | identifier-list ':' typename [ '=' initialiser ] ';'
+ ;
+
+initialiser =
+ | literal
+ | expression
+ | '.' identifier '=' initialiser
+ | '{' [ initialiser-list ] '}'
+ ;
+
+expression =
+ | literal
+ | identifier
+ ;
+
+initialiser-list =
+ | initialiser
+ | initialiser ',' initialiser-list
+ ;
+
+fn-definition =
+ | identifier '::' '(' [ fn-parameter-list ] ')' ':' fn-rettype fn-body
+ ;
+
+fn-parameter-list =
+ | fn-parameter
+ | fn-parameter ',' fn-parameter-list
+ ;
+
+fn-parameter =
+ | identifier ':' typename
+ ;
+
+fn-rettype =
+ | 'void'
+ | typename
+ ;
+
+fn-body =
+ | '{' [ statement-list ] '}'
+ ;
+
+statement-list =
+ | statement
+ | statement statement-list
+ ;
+
+statement =
+ | var-definition
+ | block-statement
+ | return-statement
+ | expr-statement
+ ;
+
+block-statement =
+ | '{' [ statement-list ] '}'
+ ;
+
+return-statement =
+ | 'return' [ expression ] ';'
+ ;
+
+expr-statement =
+ | expression ';'
+ ;
diff --git a/docs/simple.grammar b/docs/simple.grammar
@@ -0,0 +1,64 @@
+program =
+ | [ statement-list ] $eof
+ ;
+
+statement-list =
+ | statement
+ | statement statement-list
+ ;
+
+statement =
+ | null-statement
+ | decl-statement
+ | ret-statement
+ | block-statement
+ ;
+
+null-statement =
+ | ';'
+ ;
+
+decl-statement =
+ | ident ':' type '=' expr ';'
+ ;
+
+ret-statement =
+ | 'return' ident ';'
+ ;
+
+block-statement =
+ | '{' [ statement-list ] '}'
+ ;
+
+ident =
+ | ? c-style identifier ?
+ ;
+
+type =
+ | 'u64'
+ ;
+
+expr =
+ | ident
+ | literal
+ | binary-op
+ ;
+
+binary-op =
+ | expr expr binary-operator
+ ;
+
+binary-operator =
+ | '+'
+ | '-'
+ | '*'
+ | '/'
+ ;
+
+literal =
+ | integer-literal
+ ;
+
+integer-literal =
+ | ? c-style integer literal ?
+ ;
diff --git a/examples/test.script b/examples/test.script
@@ -0,0 +1,2 @@
+x : u64 = 0;
+return 42 1 + x *;
diff --git a/libscript/libscript.c b/libscript/libscript.c
@@ -0,0 +1,649 @@
+#include "libscript_internal.h"
+
+struct token_stream {
+ struct script_token *ptr;
+ size_t len, cur;
+};
+
+struct compile_ctx {
+ FILE *errstream;
+ int verbose;
+
+ struct arena *arena;
+
+ struct token_stream *stream;
+
+ char scratch[64];
+};
+
+static inline void
+dbglog(struct compile_ctx *ctx, char const *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ vfprintf(ctx->errstream, fmt, va);
+ va_end(va);
+}
+
+static int
+try_tokenise_keyword(char *src, char *end, struct script_token *out)
+{
+ size_t len = end - src;
+
+ if (len == strlen("return") && strncmp(src, "return", len) == 0) {
+ out->type = SCR_TOKEN_RETURN;
+ return 0;
+ } else if (len == strlen("u64") && strncmp(src, "u64", len) == 0) {
+ out->type = SCR_TOKEN_U64;
+ return 0;
+ }
+
+ return -1;
+}
+
+static struct token_stream
+tokenise(struct compile_ctx *ctx, char *src, size_t src_len)
+{
+ if (ctx->verbose)
+ dbglog(ctx, "info: tokenise() for %zu bytes of source\n", src_len);
+
+ char *end = src + src_len;
+
+ struct script_token *token;
+ char *buf = src, *buf_end = src;
+
+ while (src < end) {
+ char lookahead[] = {
+ src[0],
+ (src + 1 < end) ? src[1] : '\0',
+ (src + 2 < end) ? src[2] : '\0',
+ };
+
+ if (isspace(lookahead[0]))
+ goto next_char;
+
+ switch (lookahead[0]) {
+ /* these single char sequences map directly to a unique token */
+ case SCR_TOKEN_LPAREN: case SCR_TOKEN_RPAREN:
+ case SCR_TOKEN_LBRACK: case SCR_TOKEN_RBRACK:
+ case SCR_TOKEN_LBRACE: case SCR_TOKEN_RBRACE:
+ case SCR_TOKEN_LANGLE: case SCR_TOKEN_RANGLE:
+ case SCR_TOKEN_LSLASH: case SCR_TOKEN_RSLASH:
+ case SCR_TOKEN_COLON: case SCR_TOKEN_SEMICOLON:
+ case SCR_TOKEN_DOT: case SCR_TOKEN_COMMA:
+ case SCR_TOKEN_PLUS: case SCR_TOKEN_MINUS:
+ case SCR_TOKEN_STAR: case SCR_TOKEN_EQUALS:
+ token = ALLOC_SIZED(ctx->arena, struct script_token);
+ assert(token);
+
+ token->type = (enum script_token_type) lookahead[0];
+ goto next_char;
+
+ /* TODO: multi-char sequences map to unique tokens */
+ /* TODO: a string literal */
+ /* TODO: a character literal */
+
+ /* an integer literal or float literal (TODO) */
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ buf = buf_end = src;
+
+ uint64_t value = strtoull(buf, &buf_end, 0);
+ if (ctx->verbose)
+ dbglog(ctx, "info: have integer literal: '%.*s'\n",
+ (int) (buf_end - buf), buf);
+
+ if (errno == EINVAL) {
+ dbglog(ctx, "error: integer literal is invalid: '%.*s'\n",
+ (int) (buf_end - buf), buf);
+ goto error;
+ } else if (errno == ERANGE) {
+ dbglog(ctx, "warn: integer literal is out of range, truncating: "
+ "'%.*s'\n", (int) (buf_end - buf), buf);
+ }
+
+ token = ALLOC_SIZED(ctx->arena, struct script_token);
+ assert(token);
+
+ token->type = SCR_TOKEN_LITERAL_INT;
+ token->literal_int = value;
+
+ src = buf_end;
+ break;
+
+ /* anything else is a fragment of a ident or keyword */
+ default:
+ if (!isalnum(lookahead[0]) && lookahead[0] != '_') {
+ dbglog(ctx, "error: unexpected character: %c\n",
+ lookahead[0]);
+ goto error;
+ }
+
+ buf = buf_end = src;
+ while (isalnum(*buf_end) || *buf_end == '_')
+ buf_end++;
+
+ if (ctx->verbose)
+ dbglog(ctx, "info: have ident or keyword: '%.*s'\n",
+ (int) (buf_end - buf), buf);
+
+ token = ALLOC_SIZED(ctx->arena, struct script_token);
+ assert(token);
+
+ if (try_tokenise_keyword(buf, buf_end, token) < 0) {
+ token->type = SCR_TOKEN_IDENT;
+ // TODO: get flystr
+ }
+
+ src = buf_end;
+
+ break;
+ }
+
+ continue;
+
+next_char:
+ src++;
+ }
+
+ return (struct token_stream) {
+ .ptr = ctx->arena->ptr,
+ .len = ctx->arena->len / sizeof *token,
+ .cur = 0,
+ };
+
+error:
+ return (struct token_stream) {0};
+}
+
+static inline char const *
+token_type_str(enum script_token_type type)
+{
+ static char const *type_to_str[] = {
+ [SCR_TOKEN_EOF] = "EOF",
+ [SCR_TOKEN_IDENT] = "IDENT",
+ [SCR_TOKEN_LITERAL_INT] = "LITERAL_INT",
+ [SCR_TOKEN_U64] = "U64",
+ [SCR_TOKEN_RETURN] = "RETURN",
+ [SCR_TOKEN_LPAREN] = "(",
+ [SCR_TOKEN_RPAREN] = ")",
+ [SCR_TOKEN_LBRACK] = "[",
+ [SCR_TOKEN_RBRACK] = "]",
+ [SCR_TOKEN_LBRACE] = "{",
+ [SCR_TOKEN_RBRACE] = "}",
+ [SCR_TOKEN_LANGLE] = "<",
+ [SCR_TOKEN_RANGLE] = ">",
+ [SCR_TOKEN_LSLASH] = "\\",
+ [SCR_TOKEN_RSLASH] = "/",
+ [SCR_TOKEN_COLON] = ":",
+ [SCR_TOKEN_SEMICOLON] = ";",
+ [SCR_TOKEN_DOT] = ".",
+ [SCR_TOKEN_COMMA] = ",",
+ [SCR_TOKEN_EQUALS] = "=",
+ [SCR_TOKEN_PLUS] = "+",
+ [SCR_TOKEN_MINUS] = "-",
+ [SCR_TOKEN_STAR] = "*",
+ };
+
+ return type_to_str[type];
+}
+
+static inline int
+dump_token(struct script_token *token, char *buf, size_t cap)
+{
+ switch (token->type) {
+ case SCR_TOKEN_IDENT:
+ return snprintf(buf, cap, "Token {type: %s, ident: %" PRIu64 "}",
+ token_type_str(token->type), token->ident.v);
+
+ case SCR_TOKEN_LITERAL_INT:
+ return snprintf(buf, cap, "Token {type: %s, literal_int: %" PRIu64 "}",
+ token_type_str(token->type), token->literal_int);
+
+ default:
+ return snprintf(buf, cap, "Token {type: %s}", token_type_str(token->type));
+ }
+}
+
+static void
+dump_token_stream(struct compile_ctx *ctx, struct script_token *toks, size_t len)
+{
+ struct script_token *end = toks + len;
+
+ dbglog(ctx, "token stream: %zu tokens\n", len);
+
+ char buf[64];
+ while (toks < end) {
+ int written = dump_token(toks, buf, sizeof buf);
+ assert(written);
+
+ dbglog(ctx, "\t%.*s\n", written, buf);
+
+ toks++;
+ }
+
+ dbglog(ctx, "\n");
+}
+
+static inline struct script_token
+peek(struct compile_ctx *ctx, size_t off)
+{
+ if (ctx->stream->cur + off >= ctx->stream->len)
+ return (struct script_token) { .type = SCR_TOKEN_EOF, };
+
+ if (ctx->verbose)
+ dbglog(ctx, "peek(%zu/%zu), %d\n", ctx->stream->cur + off,
+ ctx->stream->len, ctx->stream->ptr[ctx->stream->cur + off].type);
+
+ return ctx->stream->ptr[ctx->stream->cur + off];
+}
+
+static inline struct script_token
+next(struct compile_ctx *ctx)
+{
+ if (ctx->stream->cur > ctx->stream->len)
+ return (struct script_token) { .type = SCR_TOKEN_EOF, };
+
+ if (ctx->verbose)
+ dbglog(ctx, "next(%zu/%zu), %d\n", ctx->stream->cur, ctx->stream->len,
+ ctx->stream->ptr[ctx->stream->cur].type);
+
+ return ctx->stream->ptr[ctx->stream->cur++];
+}
+
+static inline struct script_token
+expect(struct compile_ctx *ctx, enum script_token_type expected)
+{
+ struct script_token tok = next(ctx);
+
+ if (ctx->verbose)
+ dbglog(ctx, "expect(%zu/%zu, T: %d), %d\n",
+ ctx->stream->cur, ctx->stream->len, expected, tok.type);
+
+ if (tok.type != expected) {
+ int len = dump_token(&tok, ctx->scratch, sizeof ctx->scratch);
+ dbglog(ctx, "error: expected %s, got: %.*s\n",
+ token_type_str(expected), len, ctx->scratch);
+ PANIC();
+ }
+
+ return tok;
+}
+
+static struct script_typeinfo *
+primitive_typeinfo(enum script_type type)
+{
+ static struct script_typeinfo typeinfos[] = {
+ [SCR_TOKEN_U64] = { .type = SCR_TYPE_U64, .size = 8, .alignment = 8, },
+ };
+
+ return &typeinfos[type];
+}
+
+static struct script_typeinfo *
+literal_typeinfo(struct compile_ctx *ctx, enum script_token_type type)
+{
+ switch (type) {
+ case SCR_TOKEN_LITERAL_INT: // TODO: better rules surrounding literal types
+ return primitive_typeinfo(SCR_TYPE_U64);
+
+ default: {
+ dbglog(ctx, "error: invalid token type has no type info: %s\n",
+ token_type_str(type));
+ return NULL;
+ } break;
+ }
+}
+
+static struct script_typeinfo *
+parse_typeinfo(struct compile_ctx *ctx)
+{
+ struct script_token tok = next(ctx);
+
+ switch (tok.type) {
+ case SCR_TOKEN_U64:
+ return primitive_typeinfo(SCR_TYPE_U64);
+
+ default: {
+ int len = dump_token(&tok, ctx->scratch, sizeof ctx->scratch);
+ dbglog(ctx, "error: expected typeinfo, got: %.*s\n",
+ len, ctx->scratch);
+ return NULL;
+ } break;
+ }
+
+}
+
+static struct script_expr *
+parse_expr(struct compile_ctx *ctx)
+{
+ struct script_expr *stack[128];
+ size_t i = 0;
+
+ while (i < sizeof stack) {
+ struct script_token tok = peek(ctx, 0);
+
+ struct script_expr *expr;
+ switch (tok.type) {
+ case SCR_TOKEN_IDENT:
+ expr = ALLOC_SIZED(ctx->arena, struct script_expr);
+ assert(expr);
+
+ tok = next(ctx);
+
+ expr->type = SCR_EXPR_IDENT;
+ expr->ident = tok.ident;
+
+ // TODO: fetch the previously registered typeinfo?
+ expr->typeinfo = primitive_typeinfo(SCR_TYPE_U64);
+
+ break;
+
+ case SCR_TOKEN_LITERAL_INT:
+ expr = ALLOC_SIZED(ctx->arena, struct script_expr);
+ assert(expr);
+
+ tok = next(ctx);
+
+ expr->type = SCR_EXPR_LITERAL_INT;
+ expr->literal_int = tok.literal_int;
+ expr->typeinfo = literal_typeinfo(ctx, tok.type);
+
+ break;
+
+ case SCR_TOKEN_PLUS:
+ case SCR_TOKEN_MINUS:
+ case SCR_TOKEN_STAR:
+ case SCR_TOKEN_RSLASH:
+ expr = ALLOC_SIZED(ctx->arena, struct script_expr);
+ assert(expr);
+
+ expr->type = SCR_EXPR_BINARY_OP;
+ switch (next(ctx).type) {
+ case SCR_TOKEN_PLUS: expr->binary_op.type = SCR_BINARY_OP_ADD; break;
+ case SCR_TOKEN_MINUS: expr->binary_op.type = SCR_BINARY_OP_SUB; break;
+ case SCR_TOKEN_STAR: expr->binary_op.type = SCR_BINARY_OP_MUL; break;
+ case SCR_TOKEN_RSLASH: expr->binary_op.type = SCR_BINARY_OP_DIV; break;
+ default: UNREACHABLE(); break;
+ }
+
+ assert(i >= 2);
+ expr->binary_op.rhs = stack[--i];
+ expr->binary_op.lhs = stack[--i];
+
+ assert(expr->binary_op.lhs->typeinfo == expr->binary_op.rhs->typeinfo);
+ expr->typeinfo = expr->binary_op.lhs->typeinfo;
+
+ break;
+
+ default:
+ goto end;
+ }
+
+ assert(i < sizeof stack);
+ stack[i++] = expr;
+ }
+
+ if (i == sizeof stack) {
+ struct script_token tok = peek(ctx, 0);
+ int len = dump_token(&tok, ctx->scratch, sizeof ctx->scratch);
+ dbglog(ctx, "error: stack overflow while parsing expression: '%.*s'\n",
+ len, ctx->scratch);
+ return NULL;
+ }
+
+end:
+ return stack[0];
+}
+
+static struct script_stmt *
+parse_return(struct compile_ctx *ctx)
+{
+ struct script_stmt *stmt = ALLOC_SIZED(ctx->arena, struct script_stmt);
+ assert(stmt);
+
+ stmt->type = SCR_STMT_RET;
+ expect(ctx, SCR_TOKEN_RETURN);
+ stmt->ret.expr = parse_expr(ctx);
+
+ expect(ctx, SCR_TOKEN_SEMICOLON);
+
+ return stmt;
+}
+
+static struct script_stmt *
+parse_vardecl(struct compile_ctx *ctx)
+{
+ struct script_stmt *stmt = ALLOC_SIZED(ctx->arena, struct script_stmt);
+ assert(stmt);
+
+ stmt->type = SCR_STMT_VARDECL;
+ stmt->vardecl.ident = expect(ctx, SCR_TOKEN_IDENT).ident;
+ expect(ctx, SCR_TOKEN_COLON);
+ stmt->vardecl.typeinfo = parse_typeinfo(ctx);
+ expect(ctx, SCR_TOKEN_EQUALS);
+ stmt->vardecl.expr = parse_expr(ctx);
+
+ assert(stmt->vardecl.typeinfo == stmt->vardecl.expr->typeinfo);
+
+ expect(ctx, SCR_TOKEN_SEMICOLON);
+
+ return stmt;
+}
+
+static struct script_stmt *
+parse_statement(struct compile_ctx *ctx)
+{
+ struct script_token tok = peek(ctx, 0);
+ switch (tok.type) {
+ case SCR_TOKEN_RETURN:
+ return parse_return(ctx);
+
+ case SCR_TOKEN_IDENT:
+ return parse_vardecl(ctx);
+
+ default: {
+ int len = dump_token(&tok, ctx->scratch, sizeof ctx->scratch);
+ dbglog(ctx, "error: expected a statement, got: '%.*s'\n",
+ len, ctx->scratch);
+ return NULL;
+ } break;
+ }
+}
+
+static struct script_stmt *
+parse_statement_list(struct compile_ctx *ctx)
+{
+ struct script_stmt *stmt = ALLOC_SIZED(ctx->arena, struct script_stmt);
+ assert(stmt);
+
+ stmt->type = SCR_STMT_BLOCK;
+ stmt->block.children.head = stmt->block.children.tail = NULL;
+
+ while (peek(ctx, 0).type != SCR_TOKEN_EOF) {
+ struct script_stmt *child = parse_statement(ctx);
+ list_push_tail(&stmt->block.children, &child->list_node);
+ }
+
+ return stmt;
+}
+
+static struct script_stmt *
+parse(struct compile_ctx *ctx)
+{
+ struct script_stmt *ast = parse_statement_list(ctx);
+ assert(ast);
+
+ assert(ctx->stream->cur == ctx->stream->len);
+
+ return ast;
+}
+
+static void
+dump_typeinfo(struct compile_ctx *ctx, struct script_typeinfo *typeinfo, size_t indent)
+{
+#define leader(indent) \
+ for (size_t i = 0; i < indent; i++) dbglog(ctx, " ");
+
+ switch (typeinfo->type) {
+ case SCR_TYPE_U64: {
+ static char const *type_str[] = {
+ [SCR_TYPE_U64] = "U64",
+ };
+
+ leader(indent)
+ dbglog(ctx, "Typeinfo { type: %s, size: %zu, alignment: %zu }\n",
+ type_str[typeinfo->type], typeinfo->size, typeinfo->alignment);
+ } break;
+ }
+
+#undef leader
+}
+
+static void
+dump_expr(struct compile_ctx *ctx, struct script_expr *expr, size_t indent)
+{
+#define leader(indent) \
+ for (size_t i = 0; i < indent; i++) dbglog(ctx, " ");
+
+ switch (expr->type) {
+ case SCR_EXPR_IDENT:
+ leader(indent)
+ dbglog(ctx, "ident: %" PRIu64 "\n", expr->ident.v);
+
+ leader(indent + 1)
+ dbglog(ctx, "typeinfo:\n");
+ dump_typeinfo(ctx, expr->typeinfo, indent + 2);
+ break;
+
+ case SCR_EXPR_LITERAL_INT:
+ leader(indent)
+ dbglog(ctx, "literal int: %" PRIu64 "\n", expr->literal_int);
+
+ leader(indent + 1)
+ dbglog(ctx, "typeinfo:\n");
+ dump_typeinfo(ctx, expr->typeinfo, indent + 2);
+ break;
+
+ case SCR_EXPR_BINARY_OP: {
+ static char const *binary_op_str[] = {
+ [SCR_BINARY_OP_ADD] = "+",
+ [SCR_BINARY_OP_SUB] = "-",
+ [SCR_BINARY_OP_MUL] = "*",
+ [SCR_BINARY_OP_DIV] = "/",
+ };
+
+ leader(indent)
+ dbglog(ctx, "binary op: %s\n", binary_op_str[expr->binary_op.type]);
+
+ leader(indent + 1)
+ dbglog(ctx, "typeinfo:\n");
+ dump_typeinfo(ctx, expr->typeinfo, indent + 2);
+
+ leader(indent + 1)
+ dbglog(ctx, "lhs:\n");
+ dump_expr(ctx, expr->binary_op.lhs, indent + 2);
+
+ leader(indent + 1)
+ dbglog(ctx, "rhs:\n");
+ dump_expr(ctx, expr->binary_op.rhs, indent + 2);
+ } break;
+ }
+
+#undef leader
+}
+
+static void
+dump_parse_tree(struct compile_ctx *ctx, struct script_stmt *node, size_t indent)
+{
+#define leader(indent) \
+ for (size_t i = 0; i < indent; i++) dbglog(ctx, " ");
+
+ struct script_list *list;
+ switch (node->type) {
+ case SCR_STMT_BLOCK:
+ leader(indent)
+ dbglog(ctx, "block\n");
+
+ list = &node->block.children;
+ SCRIPT_LIST_ITER(list) {
+ struct script_stmt *child =
+ SCRIPT_FROM_NODE(it, struct script_stmt, list_node);
+
+ dump_parse_tree(ctx, child, indent + 1);
+ }
+ break;
+
+ case SCR_STMT_VARDECL:
+ leader(indent)
+ dbglog(ctx, "vardecl: ident: %" PRIu64 "\n", node->vardecl.ident.v);
+
+ leader(indent + 1)
+ dbglog(ctx, "typeinfo:\n");
+ dump_typeinfo(ctx, node->vardecl.typeinfo, indent + 2);
+
+ leader(indent + 1)
+ dbglog(ctx, "expr:\n");
+ dump_expr(ctx, node->vardecl.expr, indent + 2);
+ break;
+
+ case SCR_STMT_RET:
+ leader(indent)
+ dbglog(ctx, "return\n");
+
+ leader(indent + 1)
+ dbglog(ctx, "expr:\n");
+ dump_expr(ctx, node->ret.expr, indent + 2);
+ break;
+ }
+
+#undef leader
+}
+
+int
+script_parse(char *src, size_t src_len, void *mem, size_t mem_len,
+ struct script_stmt **out, FILE *errstream, int verbose)
+{
+ struct compile_ctx ctx = {
+ .errstream = errstream,
+ .verbose = verbose,
+ };
+
+ struct arena arena = {
+ .ptr = mem,
+ .cap = mem_len,
+ .len = 0,
+ };
+
+ if (ctx.verbose)
+ dbglog(&ctx, "info: arena cap: %zu bytes, verbose: %d\n",
+ arena.cap, verbose);
+
+ ctx.arena = &arena;
+
+ struct token_stream stream = tokenise(&ctx, src, src_len);
+ if (!stream.ptr) {
+ dbglog(&ctx, "error: failed to tokenise source\n");
+ return -1;
+ }
+
+ if (ctx.verbose)
+ dump_token_stream(&ctx, stream.ptr, stream.len);
+
+ ctx.stream = &stream;
+
+ struct script_stmt *stmt = parse(&ctx);
+ if (!stmt) {
+ dbglog(&ctx, "error: failed to parse source\n");
+ return -1;
+ }
+
+ if (verbose)
+ dump_parse_tree(&ctx, stmt, 0);
+
+ *out = stmt;
+
+ return 0;
+}
+
+#include "utils.c"
diff --git a/libscript/libscript.h b/libscript/libscript.h
@@ -0,0 +1,172 @@
+#ifndef LIBSCRIPT_H
+#define LIBSCRIPT_H
+
+#include <assert.h>
+#include <errno.h>
+#include <stdalign.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* utilities
+ * ===========================================================================
+ */
+
+#define SCRIPT_TO_PARENT(child_ptr, T, member) \
+ ((child_ptr) ? (T *) ((uintptr_t) child_ptr - offsetof(T, member)) : NULL)
+
+struct script_flystr {
+ uint64_t v;
+};
+
+struct script_list_node {
+ struct script_list_node *prev, *next;
+};
+
+#define SCRIPT_FROM_NODE(node, T, member) SCRIPT_TO_PARENT(node, T, member)
+
+#define SCRIPT_NODE_ITER(node) \
+ for (struct script_list_node *it = (node); it; it = it->next)
+
+#define SCRIPT_NODE_RITER(node) \
+ for (struct script_list_node *it = (node); it; it = it->prev)
+
+struct script_list {
+ struct script_list_node *head, *tail;
+};
+
+#define SCRIPT_LIST_ITER(list) SCRIPT_NODE_ITER((list)->head)
+#define SCRIPT_LIST_RITER(list) SCRIPT_NODE_RITER((list)->tail)
+
+/* lexer
+ * ===========================================================================
+ */
+
+enum script_token_type {
+ SCR_TOKEN_EOF,
+
+ /* literals */
+ SCR_TOKEN_IDENT,
+ SCR_TOKEN_LITERAL_INT,
+
+ /* keywords */
+ SCR_TOKEN_U64,
+ SCR_TOKEN_RETURN,
+
+ /* punctuation */
+ SCR_TOKEN_LPAREN = '(',
+ SCR_TOKEN_RPAREN = ')',
+ SCR_TOKEN_LBRACK = '[',
+ SCR_TOKEN_RBRACK = ']',
+ SCR_TOKEN_LBRACE = '{',
+ SCR_TOKEN_RBRACE = '}',
+
+ SCR_TOKEN_LANGLE = '<',
+ SCR_TOKEN_RANGLE = '>',
+ SCR_TOKEN_LSLASH = '\\',
+ SCR_TOKEN_RSLASH = '/',
+
+ SCR_TOKEN_COLON = ':',
+ SCR_TOKEN_SEMICOLON = ';',
+ SCR_TOKEN_DOT = '.',
+ SCR_TOKEN_COMMA = ',',
+
+ SCR_TOKEN_EQUALS = '=',
+ SCR_TOKEN_PLUS = '+',
+ SCR_TOKEN_MINUS = '-',
+ SCR_TOKEN_STAR = '*',
+};
+
+struct script_token {
+ enum script_token_type type;
+
+ union {
+ struct script_flystr ident;
+ uint64_t literal_int;
+ };
+};
+
+/* parser
+ * ===========================================================================
+ */
+
+enum script_type {
+ SCR_TYPE_U64,
+};
+
+struct script_typeinfo {
+ enum script_type type;
+
+ size_t size;
+ size_t alignment;
+};
+
+enum script_expr_type {
+ SCR_EXPR_IDENT,
+ SCR_EXPR_LITERAL_INT,
+ SCR_EXPR_BINARY_OP,
+};
+
+struct script_expr {
+ enum script_expr_type type;
+
+ struct script_typeinfo *typeinfo;
+
+ union {
+ struct script_flystr ident;
+
+ uint64_t literal_int;
+
+ struct {
+ enum {
+ SCR_BINARY_OP_ADD,
+ SCR_BINARY_OP_SUB,
+ SCR_BINARY_OP_MUL,
+ SCR_BINARY_OP_DIV,
+ } type;
+
+ struct script_expr *lhs, *rhs;
+ } binary_op;
+ };
+};
+
+enum script_stmt_type {
+ SCR_STMT_BLOCK,
+ SCR_STMT_VARDECL,
+ SCR_STMT_RET,
+};
+
+struct script_stmt {
+ enum script_stmt_type type;
+
+ union {
+ struct {
+ struct script_list children;
+ } block;
+
+ struct {
+ struct script_flystr ident;
+ struct script_typeinfo *typeinfo;
+ struct script_expr *expr;
+ } vardecl;
+
+ struct {
+ struct script_expr *expr;
+ } ret;
+ };
+
+ struct script_list_node list_node;
+};
+
+/* libscript
+ * ===========================================================================
+ */
+
+extern int
+script_parse(char *src, size_t src_len, void *mem, size_t mem_len,
+ struct script_stmt **out, FILE *errstream, int verbose);
+
+#endif /* LIBSCRIPT_H */
diff --git a/libscript/libscript_internal.h b/libscript/libscript_internal.h
@@ -0,0 +1,95 @@
+#ifndef LIBSCRIPT_INTERNAL_H
+#define LIBSCRIPT_INTERNAL_H
+
+#include "libscript.h"
+
+#include <ctype.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#define UNREACHABLE() (*((volatile char *) 0) = 0)
+#define PANIC() UNREACHABLE()
+
+#define ALIGN_PREV(v, align) ((v) & ~((align) - 1))
+#define ALIGN_NEXT(v, align) ALIGN_PREV((v) + ((align) - 1), (align))
+
+struct arena {
+ void *ptr;
+ size_t cap, len;
+};
+
+inline void
+arena_reset(struct arena *arena)
+{
+ arena->len = 0;
+}
+
+inline void *
+arena_alloc(struct arena *arena, size_t size, size_t align)
+{
+ size_t aligned_len = ALIGN_NEXT(arena->len, align);
+ if (aligned_len + size > arena->cap)
+ return NULL;
+
+ void *ptr = (void *) ((uintptr_t) arena->ptr + aligned_len);
+ arena->len = aligned_len + size;
+
+ return ptr;
+}
+
+#define ALLOC_ARRAY(arena, T, n) \
+ arena_alloc((arena), sizeof(T) * (n), alignof(T))
+
+#define ALLOC_SIZED(arena, T) ALLOC_ARRAY((arena), T, 1)
+
+inline void
+list_push_head(struct script_list *restrict list,
+ struct script_list_node *restrict node)
+{
+ if (!list->tail)
+ list->tail = node;
+
+ if (list->head)
+ list->head->prev = node;
+
+ node->next = list->head;
+ list->head = node;
+}
+
+inline void
+list_push_tail(struct script_list *restrict list,
+ struct script_list_node *restrict node)
+{
+ if (!list->head)
+ list->head = node;
+
+ if (list->tail)
+ list->tail->next = node;
+
+ node->prev = list->tail;
+ list->tail = node;
+}
+
+inline struct script_list_node *
+list_pop_head(struct script_list *list)
+{
+ if (!list->head)
+ return NULL;
+
+ struct script_list_node *node = list->head;
+ list->head = node->next;
+ return node;
+}
+
+inline struct script_list_node *
+list_pop_tail(struct script_list *list)
+{
+ if (!list->tail)
+ return NULL;
+
+ struct script_list_node *node = list->tail;
+ list->tail = node->prev;
+ return node;
+}
+
+#endif /* LIBSCRIPT_INTERNAL_H */
diff --git a/libscript/utils.c b/libscript/utils.c
@@ -0,0 +1,21 @@
+#include "libscript_internal.h"
+
+extern inline void
+arena_reset(struct arena *arena);
+
+extern inline void *
+arena_alloc(struct arena *arena, size_t size, size_t align);
+
+extern inline void
+list_push_head(struct script_list *restrict list,
+ struct script_list_node *restrict node);
+
+extern inline void
+list_push_tail(struct script_list *restrict list,
+ struct script_list_node *restrict node);
+
+extern inline struct script_list_node *
+list_pop_head(struct script_list *list);
+
+extern inline struct script_list_node *
+list_pop_tail(struct script_list *list);
diff --git a/scriptcc/scriptcc.c b/scriptcc/scriptcc.c
@@ -0,0 +1,7 @@
+#include "libscript.h"
+
+int
+main(int argc, char **argv)
+{
+ return 0;
+}
diff --git a/scriptvm/scriptvm.c b/scriptvm/scriptvm.c
@@ -0,0 +1,142 @@
+#include "libscript.h"
+
+#include <stdio.h>
+#include <getopt.h>
+
+#define MiB (1024 * 1024)
+
+struct {
+ int verbose;
+ FILE *logfile;
+ uint64_t mem;
+
+ struct {
+ char **ptr;
+ size_t len;
+ } sources;
+} opts = {
+ .verbose = 0,
+ .logfile = NULL,
+ .mem = 8192 * 1024,
+
+ .sources.ptr = NULL,
+ .sources.len = 0,
+};
+
+#define OPTSTR "hvf:m:"
+
+static void
+usage(char *prog)
+{
+ fprintf(stderr, "Usage: %s [-hv] [-m <mem-cap-mib>] sources...\n", prog);
+ fprintf(stderr, "\t-h : display usage information\n");
+ fprintf(stderr, "\t-v : enable verbose logging\n");
+ fprintf(stderr, "\t-f : file to log compilation errors to (default: stderr)\n");
+ fprintf(stderr, "\t-m : maximum memory for compilation, in MiB (default: 8 MiB)\n");
+ fprintf(stderr, "\tsources... : the source files to interpret\n");
+}
+
+static int
+parse_opts(int argc, char **argv)
+{
+ int opt;
+ while ((opt = getopt(argc, argv, OPTSTR)) > 0) {
+ switch (opt) {
+ case 'v':
+ opts.verbose = 1;
+ break;
+
+ case 'f':
+ if (!(opts.logfile = fopen(optarg, "w+"))) {
+ fprintf(stderr, "Failed to open logfile: %s\n", optarg);
+ return -1;
+ }
+ break;
+
+ case 'm':
+ if (!(opts.mem = strtoull(optarg, NULL, 0) * MiB)) {
+ fprintf(stderr, "Failed to parse memory limit: %s\n", optarg);
+ return -1;
+ }
+ break;
+
+ default:
+ return -1;
+ }
+ }
+
+ if (!opts.logfile)
+ opts.logfile = stderr;
+
+ opts.sources.ptr = argv + optind;
+ opts.sources.len = argc - optind;
+
+ if (!opts.sources.len) {
+ fprintf(stderr, "Failed to provide source files\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+interpret(struct script_stmt *ast)
+{
+ (void) ast;
+
+ // TODO: interpret it
+}
+
+int
+main(int argc, char **argv)
+{
+ if (parse_opts(argc, argv)) {
+ usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ void *mem = malloc(opts.mem);
+ assert(mem);
+
+ memset(mem, 0, opts.mem);
+
+ size_t mem_len = opts.mem;
+
+ // TODO: mmap files in? read them into a single buffer?
+ for (size_t i = 0; i < opts.sources.len; i++) {
+ char *source = opts.sources.ptr[i];
+ fprintf(stderr, "Interpreting source file: %s\n", source);
+
+ FILE *fp = fopen(source, "r");
+ if (!fp) {
+ fprintf(stderr, "Failed to open source file: %s\n", source);
+ continue;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ size_t src_len = ftell(fp);
+ rewind(fp);
+
+ char *src = malloc(src_len);
+ assert(src);
+
+ size_t nbytes = fread(src, 1, src_len, fp);
+ assert(nbytes == src_len);
+
+ fclose(fp);
+
+ fprintf(stderr, "\tRead %zu bytes of file, parsing...\n", src_len);
+
+ struct script_stmt *ast;
+ if (script_parse(src, src_len, mem, mem_len, &ast,
+ opts.logfile, opts.verbose) < 0) {
+ fprintf(stderr, "Failed to parse source file: %s\n", source);
+ continue;
+ }
+
+ // emit(ast); // TODO: dump out bytecode to a file?
+ interpret(ast);
+ }
+
+ exit(EXIT_SUCCESS);
+}