Initial commit - script

commit ea67d2dce57bf5c68942c320c78790c80d7acc36
Author: Mikołaj Lenczewski <mblenczewski@gmail.com>
Date:   Sun, 12 Jan 2025 20:26:49 +0000

Initial commit

Start parser for simplified script grammar, including just blocks,
variable declarations, and return statements. Start both scriptvm and
scriptcc drivers, for interpreting script via a bytecode vm and
compiling script into a native executable, respectively.

Have yet to finish the scriptvm stub to evaluate the parsed AST. Need to
track variable declaration typeinfos instead of kludg-ing with the
default typeinfo. Need to handle literal typeinfos better.

Diffstat:
A .editorconfig  | 17 +++++++++++++++++
A .gitignore  | 6 ++++++
A README.txt  | 2 ++
A build.sh  | 26 ++++++++++++++++++++++++++
A clean.sh  | 5 +++++
A debug.sh  | 16 ++++++++++++++++
A docs/script.grammar  | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A docs/simple.grammar  | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A examples/test.script  | 2 ++
A libscript/libscript.c  | 649 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A libscript/libscript.h  | 172 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A libscript/libscript_internal.h  | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A libscript/utils.c  | 21 +++++++++++++++++++++
A scriptcc/scriptcc.c  | 7 +++++++
A scriptvm/scriptvm.c  | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

15 files changed, 1364 insertions(+), 0 deletions(-)
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,17 @@
+root = true
+
+[*]
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+charset = utf-8
+
+guidelines = 80, 120, 160
+
+[*.{c,h}]
+indent_style = tab
+indent_size = 8
+
+[*.{grammar,md,txt}]
+indent_style = space
+indent_size = 2
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,6 @@
+bin/
+obj/
+out/
+
+**/.*.swp
+imgui.ini
diff --git a/README.txt b/README.txt
@@ -0,0 +1,2 @@
+script
+==============================================================================
diff --git a/build.sh b/build.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+CC="${CC:-clang}"
+AR="${AR:-llvm-ar}"
+RANLIB="${RANLIB:-llvm-ranlib}"
+
+WARNINGS="-Wall -Wextra -Wpedantic ${WERROR:+-Werror}"
+
+CFLAGS="-std=c11 -O0 -g"
+CPPFLAGS="-UNDEBUG"
+LDFLAGS=""
+
+set -ex
+
+mkdir -p bin obj
+
+$CC -o obj/libscript.o -c libscript/libscript.c \
+	$WARNINGS $CFLAGS $CPPFLAGS $LDFLAGS
+$AR rcs bin/libscript.a obj/libscript.o
+$RANLIB bin/libscript.a
+
+$CC -o bin/scriptvm scriptvm/scriptvm.c bin/libscript.a \
+	$WARNINGS $CFLAGS $CPPFLAGS -I libscript/ $LDFLAGS
+
+#$CC -o bin/scriptcc scriptcc/scriptcc.c bin/libscript.a \
+#	$WARNINGS $CFLAGS $CPPFLAGS -I libscript/ $LDFLAGS
diff --git a/clean.sh b/clean.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+set -ex
+
+rm -rf bin/ obj/ out/
diff --git a/debug.sh b/debug.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+case "$1" in
+	vm)
+		lldbgui -- bin/scriptvm -v examples/test.script
+		;;
+	cc)
+		mkdir -p out
+		lldbgui -- bin/scriptcc -v -o out/test examples/test.scipt
+		;;
+
+	*)
+		echo "Unknown mode: ${1:-<none>}, must be one of: vm cc"
+		exit 1
+		;;
+esac
diff --git a/docs/script.grammar b/docs/script.grammar
@@ -0,0 +1,140 @@
+program =
+  | symbol program
+  | $eof
+  ;
+
+identifier =
+  | $(sequence of alphanumeric chars or '_' not starting with a number)
+  ;
+
+literal =
+  | int-literal
+  | chr-literal
+  | str-literal
+  ;
+
+int-literal =
+  | $(optionally prefixed series of numbers)
+  ;
+
+chr-literal =
+  | '\'' $(optionally escaped ascii codepoint) '\''
+  ;
+
+str-literal =
+  | '"' $(sequence of optionally escaped ascii codepoints) '"'
+  ;
+
+symbol =
+  | type-definition
+  | func-definition
+  | var-definition
+  ;
+
+type-definition =
+  | identifier-list '::' typename
+  ;
+
+identifier-list =
+  | identifier
+  | identifier ',' identifier-list
+  ;
+
+typename =
+  | 'utf8'
+  | 'chr8'
+  | 'u64'
+  | 's64'
+  | identifier
+  | pointer-type
+  | array-type
+  | struct-type
+  ;
+
+pointer-type =
+  | '*' typename
+  ;
+
+array-type =
+  | '[' int-literal ']' typename
+  ;
+
+struct-type =
+  | 'struct' '{' [ struct-member-list ] '}'
+  ;
+
+struct-member-list =
+  | struct-member
+  | struct-member struct-member-list
+  ;
+
+struct-member =
+  | identifier-list ':' typename ';'
+  ;
+
+var-definition =
+  | identifier-list ':' typename [ '=' initialiser ] ';'
+  ;
+
+initialiser =
+  | literal
+  | expression
+  | '.' identifier '=' initialiser
+  | '{' [ initialiser-list ] '}'
+  ;
+
+expression =
+  | literal
+  | identifier
+  ;
+
+initialiser-list =
+  | initialiser
+  | initialiser ',' initialiser-list
+  ;
+
+fn-definition =
+  | identifier '::' '(' [ fn-parameter-list ] ')' ':' fn-rettype fn-body
+  ;
+
+fn-parameter-list =
+  | fn-parameter
+  | fn-parameter ',' fn-parameter-list
+  ;
+
+fn-parameter =
+  | identifier ':' typename
+  ;
+
+fn-rettype =
+  | 'void'
+  | typename
+  ;
+
+fn-body =
+  | '{' [ statement-list ] '}'
+  ;
+
+statement-list =
+  | statement
+  | statement statement-list
+  ;
+
+statement =
+  | var-definition
+  | block-statement
+  | return-statement
+  | expr-statement
+  ;
+
+block-statement =
+  | '{' [ statement-list ] '}'
+  ;
+
+return-statement =
+  | 'return' [ expression ] ';'
+  ;
+
+expr-statement =
+  | expression ';'
+  ;
diff --git a/docs/simple.grammar b/docs/simple.grammar
@@ -0,0 +1,64 @@
+program =
+  | [ statement-list ] $eof
+  ;
+
+statement-list =
+  | statement
+  | statement statement-list
+  ;
+
+statement =
+  | null-statement
+  | decl-statement
+  | ret-statement
+  | block-statement
+  ;
+
+null-statement =
+  | ';'
+  ;
+
+decl-statement =
+  | ident ':' type '=' expr ';'
+  ;
+
+ret-statement =
+  | 'return' ident ';'
+  ;
+
+block-statement =
+  | '{' [ statement-list ] '}'
+  ;
+
+ident =
+  | ? c-style identifier ?
+  ;
+
+type =
+  | 'u64'
+  ;
+
+expr =
+  | ident
+  | literal
+  | binary-op
+  ;
+
+binary-op =
+  | expr expr binary-operator
+  ;
+
+binary-operator =
+  | '+'
+  | '-'
+  | '*'
+  | '/'
+  ;
+
+literal =
+  | integer-literal
+  ;
+
+integer-literal =
+  | ? c-style integer literal ?
+  ;
diff --git a/examples/test.script b/examples/test.script
@@ -0,0 +1,2 @@
+x : u64 = 0;
+return 42 1 + x *;
diff --git a/libscript/libscript.c b/libscript/libscript.c
@@ -0,0 +1,649 @@
+#include "libscript_internal.h"
+
+struct token_stream {
+	struct script_token *ptr;
+	size_t len, cur;
+};
+
+struct compile_ctx {
+	FILE *errstream;
+	int verbose;
+
+	struct arena *arena;
+
+	struct token_stream *stream;
+
+	char scratch[64];
+};
+
+static inline void
+dbglog(struct compile_ctx *ctx, char const *fmt, ...)
+{
+	va_list va;
+	va_start(va, fmt);
+	vfprintf(ctx->errstream, fmt, va);
+	va_end(va);
+}
+
+static int
+try_tokenise_keyword(char *src, char *end, struct script_token *out)
+{
+	size_t len = end - src;
+
+	if (len == strlen("return") && strncmp(src, "return", len) == 0) {
+		out->type = SCR_TOKEN_RETURN;
+		return 0;
+	} else if (len == strlen("u64") && strncmp(src, "u64", len) == 0) {
+		out->type = SCR_TOKEN_U64;
+		return 0;
+	}
+
+	return -1;
+}
+
+static struct token_stream
+tokenise(struct compile_ctx *ctx, char *src, size_t src_len)
+{
+	if (ctx->verbose)
+		dbglog(ctx, "info: tokenise() for %zu bytes of source\n", src_len);
+
+	char *end = src + src_len;
+
+	struct script_token *token;
+	char *buf = src, *buf_end = src;
+
+	while (src < end) {
+		char lookahead[] = {
+			src[0],
+			(src + 1 < end) ? src[1] : '\0',
+			(src + 2 < end) ? src[2] : '\0',
+		};
+
+		if (isspace(lookahead[0]))
+			goto next_char;
+
+		switch (lookahead[0]) {
+		/* these single char sequences map directly to a unique token */
+		case SCR_TOKEN_LPAREN:	case SCR_TOKEN_RPAREN:
+		case SCR_TOKEN_LBRACK:	case SCR_TOKEN_RBRACK:
+		case SCR_TOKEN_LBRACE:	case SCR_TOKEN_RBRACE:
+		case SCR_TOKEN_LANGLE:	case SCR_TOKEN_RANGLE:
+		case SCR_TOKEN_LSLASH:	case SCR_TOKEN_RSLASH:
+		case SCR_TOKEN_COLON:	case SCR_TOKEN_SEMICOLON:
+		case SCR_TOKEN_DOT:	case SCR_TOKEN_COMMA:
+		case SCR_TOKEN_PLUS:	case SCR_TOKEN_MINUS:
+		case SCR_TOKEN_STAR:	case SCR_TOKEN_EQUALS:
+			token = ALLOC_SIZED(ctx->arena, struct script_token);
+			assert(token);
+
+			token->type = (enum script_token_type) lookahead[0];
+			goto next_char;
+
+		/* TODO: multi-char sequences map to unique tokens */
+		/* TODO: a string literal */
+		/* TODO: a character literal */
+
+		/* an integer literal or float literal (TODO) */
+		case '0': case '1': case '2': case '3': case '4':
+		case '5': case '6': case '7': case '8': case '9':
+			buf = buf_end = src;
+
+			uint64_t value = strtoull(buf, &buf_end, 0);
+			if (ctx->verbose)
+				dbglog(ctx, "info: have integer literal: '%.*s'\n",
+					    (int) (buf_end - buf), buf);
+
+			if (errno == EINVAL) {
+				dbglog(ctx, "error: integer literal is invalid: '%.*s'\n",
+					    (int) (buf_end - buf), buf);
+				goto error;
+			} else if (errno == ERANGE) {
+				dbglog(ctx, "warn: integer literal is out of range, truncating: "
+					    "'%.*s'\n", (int) (buf_end - buf), buf);
+			}
+
+			token = ALLOC_SIZED(ctx->arena, struct script_token);
+			assert(token);
+
+			token->type = SCR_TOKEN_LITERAL_INT;
+			token->literal_int = value;
+
+			src = buf_end;
+			break;
+
+		/* anything else is a fragment of a ident or keyword */
+		default:
+			if (!isalnum(lookahead[0]) && lookahead[0] != '_') {
+				dbglog(ctx, "error: unexpected character: %c\n",
+					    lookahead[0]);
+				goto error;
+			}
+
+			buf = buf_end = src;
+			while (isalnum(*buf_end) || *buf_end == '_')
+				buf_end++;
+
+			if (ctx->verbose)
+				dbglog(ctx, "info: have ident or keyword: '%.*s'\n",
+					    (int) (buf_end - buf), buf);
+
+			token = ALLOC_SIZED(ctx->arena, struct script_token);
+			assert(token);
+
+			if (try_tokenise_keyword(buf, buf_end, token) < 0) {
+				token->type = SCR_TOKEN_IDENT;
+				// TODO: get flystr
+			}
+
+			src = buf_end;
+
+			break;
+		}
+
+		continue;
+
+next_char:
+		src++;
+	}
+
+	return (struct token_stream) {
+		.ptr = ctx->arena->ptr,
+		.len = ctx->arena->len / sizeof *token,
+		.cur = 0,
+	};
+
+error:
+	return (struct token_stream) {0};
+}
+
+static inline char const *
+token_type_str(enum script_token_type type)
+{
+	static char const *type_to_str[] = {
+		[SCR_TOKEN_EOF]		= "EOF",
+		[SCR_TOKEN_IDENT]	= "IDENT",
+		[SCR_TOKEN_LITERAL_INT]	= "LITERAL_INT",
+		[SCR_TOKEN_U64]		= "U64",
+		[SCR_TOKEN_RETURN]	= "RETURN",
+		[SCR_TOKEN_LPAREN]	= "(",
+		[SCR_TOKEN_RPAREN]	= ")",
+		[SCR_TOKEN_LBRACK]	= "[",
+		[SCR_TOKEN_RBRACK]	= "]",
+		[SCR_TOKEN_LBRACE]	= "{",
+		[SCR_TOKEN_RBRACE]	= "}",
+		[SCR_TOKEN_LANGLE]	= "<",
+		[SCR_TOKEN_RANGLE]	= ">",
+		[SCR_TOKEN_LSLASH]	= "\\",
+		[SCR_TOKEN_RSLASH]	= "/",
+		[SCR_TOKEN_COLON]	= ":",
+		[SCR_TOKEN_SEMICOLON]	= ";",
+		[SCR_TOKEN_DOT]		= ".",
+		[SCR_TOKEN_COMMA]	= ",",
+		[SCR_TOKEN_EQUALS]	= "=",
+		[SCR_TOKEN_PLUS]	= "+",
+		[SCR_TOKEN_MINUS]	= "-",
+		[SCR_TOKEN_STAR]	= "*",
+	};
+
+	return type_to_str[type];
+}
+
+static inline int
+dump_token(struct script_token *token, char *buf, size_t cap)
+{
+	switch (token->type) {
+	case SCR_TOKEN_IDENT:
+		return snprintf(buf, cap, "Token {type: %s, ident: %" PRIu64 "}",
+					  token_type_str(token->type), token->ident.v);
+
+	case SCR_TOKEN_LITERAL_INT:
+		return snprintf(buf, cap, "Token {type: %s, literal_int: %" PRIu64 "}",
+					  token_type_str(token->type), token->literal_int);
+
+	default:
+		return snprintf(buf, cap, "Token {type: %s}", token_type_str(token->type));
+	}
+}
+
+static void
+dump_token_stream(struct compile_ctx *ctx, struct script_token *toks, size_t len)
+{
+	struct script_token *end = toks + len;
+
+	dbglog(ctx, "token stream: %zu tokens\n", len);
+
+	char buf[64];
+	while (toks < end) {
+		int written = dump_token(toks, buf, sizeof buf);
+		assert(written);
+
+		dbglog(ctx, "\t%.*s\n", written, buf);
+
+		toks++;
+	}
+
+	dbglog(ctx, "\n");
+}
+
+static inline struct script_token
+peek(struct compile_ctx *ctx, size_t off)
+{
+	if (ctx->stream->cur + off >= ctx->stream->len)
+		return (struct script_token) { .type = SCR_TOKEN_EOF, };
+
+	if (ctx->verbose)
+		dbglog(ctx, "peek(%zu/%zu), %d\n", ctx->stream->cur + off,
+			    ctx->stream->len,  ctx->stream->ptr[ctx->stream->cur + off].type);
+
+	return ctx->stream->ptr[ctx->stream->cur + off];
+}
+
+static inline struct script_token
+next(struct compile_ctx *ctx)
+{
+	if (ctx->stream->cur > ctx->stream->len)
+		return (struct script_token) { .type = SCR_TOKEN_EOF, };
+
+	if (ctx->verbose)
+		dbglog(ctx, "next(%zu/%zu), %d\n", ctx->stream->cur, ctx->stream->len,
+			    ctx->stream->ptr[ctx->stream->cur].type);
+
+	return ctx->stream->ptr[ctx->stream->cur++];
+}
+
+static inline struct script_token
+expect(struct compile_ctx *ctx, enum script_token_type expected)
+{
+	struct script_token tok = next(ctx);
+
+	if (ctx->verbose)
+		dbglog(ctx, "expect(%zu/%zu, T: %d), %d\n",
+			    ctx->stream->cur, ctx->stream->len, expected, tok.type);
+
+	if (tok.type != expected) {
+		int len = dump_token(&tok, ctx->scratch, sizeof ctx->scratch);
+		dbglog(ctx, "error: expected %s, got: %.*s\n",
+			    token_type_str(expected), len, ctx->scratch);
+		PANIC();
+	}
+
+	return tok;
+}
+
+static struct script_typeinfo *
+primitive_typeinfo(enum script_type type)
+{
+	static struct script_typeinfo typeinfos[] = {
+		[SCR_TOKEN_U64]	= { .type = SCR_TYPE_U64, .size = 8, .alignment = 8, },
+	};
+
+	return &typeinfos[type];
+}
+
+static struct script_typeinfo *
+literal_typeinfo(struct compile_ctx *ctx, enum script_token_type type)
+{
+	switch (type) {
+	case SCR_TOKEN_LITERAL_INT: // TODO: better rules surrounding literal types
+		return primitive_typeinfo(SCR_TYPE_U64);
+
+	default: {
+		dbglog(ctx, "error: invalid token type has no type info: %s\n",
+			    token_type_str(type));
+		return NULL;
+	} break;
+	}
+}
+
+static struct script_typeinfo *
+parse_typeinfo(struct compile_ctx *ctx)
+{
+	struct script_token tok = next(ctx);
+
+	switch (tok.type) {
+	case SCR_TOKEN_U64:
+		return primitive_typeinfo(SCR_TYPE_U64);
+
+	default: {
+		int len = dump_token(&tok, ctx->scratch, sizeof ctx->scratch);
+		dbglog(ctx, "error: expected typeinfo, got: %.*s\n",
+				len, ctx->scratch);
+		return NULL;
+	} break;
+	}
+
+}
+
+static struct script_expr *
+parse_expr(struct compile_ctx *ctx)
+{
+	struct script_expr *stack[128];
+	size_t i = 0;
+
+	while (i < sizeof stack) {
+		struct script_token tok = peek(ctx, 0);
+
+		struct script_expr *expr;
+		switch (tok.type) {
+		case SCR_TOKEN_IDENT:
+			expr = ALLOC_SIZED(ctx->arena, struct script_expr);
+			assert(expr);
+
+			tok = next(ctx);
+
+			expr->type = SCR_EXPR_IDENT;
+			expr->ident = tok.ident;
+
+			// TODO: fetch the previously registered typeinfo?
+			expr->typeinfo = primitive_typeinfo(SCR_TYPE_U64);
+
+			break;
+
+		case SCR_TOKEN_LITERAL_INT:
+			expr = ALLOC_SIZED(ctx->arena, struct script_expr);
+			assert(expr);
+
+			tok = next(ctx);
+
+			expr->type = SCR_EXPR_LITERAL_INT;
+			expr->literal_int = tok.literal_int;
+			expr->typeinfo = literal_typeinfo(ctx, tok.type);
+
+			break;
+
+		case SCR_TOKEN_PLUS:
+		case SCR_TOKEN_MINUS:
+		case SCR_TOKEN_STAR:
+		case SCR_TOKEN_RSLASH:
+			expr = ALLOC_SIZED(ctx->arena, struct script_expr);
+			assert(expr);
+
+			expr->type = SCR_EXPR_BINARY_OP;
+			switch (next(ctx).type) {
+			case SCR_TOKEN_PLUS:	expr->binary_op.type = SCR_BINARY_OP_ADD; break;
+			case SCR_TOKEN_MINUS:	expr->binary_op.type = SCR_BINARY_OP_SUB; break;
+			case SCR_TOKEN_STAR:	expr->binary_op.type = SCR_BINARY_OP_MUL; break;
+			case SCR_TOKEN_RSLASH:	expr->binary_op.type = SCR_BINARY_OP_DIV; break;
+			default:		UNREACHABLE(); break;
+			}
+
+			assert(i >= 2);
+			expr->binary_op.rhs = stack[--i];
+			expr->binary_op.lhs = stack[--i];
+
+			assert(expr->binary_op.lhs->typeinfo == expr->binary_op.rhs->typeinfo);
+			expr->typeinfo = expr->binary_op.lhs->typeinfo;
+
+			break;
+
+		default:
+			goto end;
+		}
+
+		assert(i < sizeof stack);
+		stack[i++] = expr;
+	}
+
+	if (i == sizeof stack) {
+		struct script_token tok = peek(ctx, 0);
+		int len = dump_token(&tok, ctx->scratch, sizeof ctx->scratch);
+		dbglog(ctx, "error: stack overflow while parsing expression: '%.*s'\n",
+			    len, ctx->scratch);
+		return NULL;
+	}
+
+end:
+	return stack[0];
+}
+
+static struct script_stmt *
+parse_return(struct compile_ctx *ctx)
+{
+	struct script_stmt *stmt = ALLOC_SIZED(ctx->arena, struct script_stmt);
+	assert(stmt);
+
+	stmt->type = SCR_STMT_RET;
+	expect(ctx, SCR_TOKEN_RETURN);
+	stmt->ret.expr = parse_expr(ctx);
+
+	expect(ctx, SCR_TOKEN_SEMICOLON);
+
+	return stmt;
+}
+
+static struct script_stmt *
+parse_vardecl(struct compile_ctx *ctx)
+{
+	struct script_stmt *stmt = ALLOC_SIZED(ctx->arena, struct script_stmt);
+	assert(stmt);
+
+	stmt->type = SCR_STMT_VARDECL;
+	stmt->vardecl.ident = expect(ctx, SCR_TOKEN_IDENT).ident;
+	expect(ctx, SCR_TOKEN_COLON);
+	stmt->vardecl.typeinfo = parse_typeinfo(ctx);
+	expect(ctx, SCR_TOKEN_EQUALS);
+	stmt->vardecl.expr = parse_expr(ctx);
+
+	assert(stmt->vardecl.typeinfo == stmt->vardecl.expr->typeinfo);
+
+	expect(ctx, SCR_TOKEN_SEMICOLON);
+
+	return stmt;
+}
+
+static struct script_stmt *
+parse_statement(struct compile_ctx *ctx)
+{
+	struct script_token tok = peek(ctx, 0);
+	switch (tok.type) {
+	case SCR_TOKEN_RETURN:
+		return parse_return(ctx);
+
+	case SCR_TOKEN_IDENT:
+		return parse_vardecl(ctx);
+
+	default: {
+		int len = dump_token(&tok, ctx->scratch, sizeof ctx->scratch);
+		dbglog(ctx, "error: expected a statement, got: '%.*s'\n",
+			    len, ctx->scratch);
+		return NULL;
+	} break;
+	}
+}
+
+static struct script_stmt *
+parse_statement_list(struct compile_ctx *ctx)
+{
+	struct script_stmt *stmt = ALLOC_SIZED(ctx->arena, struct script_stmt);
+	assert(stmt);
+
+	stmt->type = SCR_STMT_BLOCK;
+	stmt->block.children.head = stmt->block.children.tail = NULL;
+
+	while (peek(ctx, 0).type != SCR_TOKEN_EOF) {
+		struct script_stmt *child = parse_statement(ctx);
+		list_push_tail(&stmt->block.children, &child->list_node);
+	}
+
+	return stmt;
+}
+
+static struct script_stmt *
+parse(struct compile_ctx *ctx)
+{
+	struct script_stmt *ast = parse_statement_list(ctx);
+	assert(ast);
+
+	assert(ctx->stream->cur == ctx->stream->len);
+
+	return ast;
+}
+
+static void
+dump_typeinfo(struct compile_ctx *ctx, struct script_typeinfo *typeinfo, size_t indent)
+{
+#define leader(indent) \
+	for (size_t i = 0; i < indent; i++) dbglog(ctx, "  ");
+
+	switch (typeinfo->type) {
+	case SCR_TYPE_U64: {
+		static char const *type_str[] = {
+			[SCR_TYPE_U64]	= "U64",
+		};
+
+		leader(indent)
+		dbglog(ctx, "Typeinfo { type: %s, size: %zu, alignment: %zu }\n",
+			    type_str[typeinfo->type], typeinfo->size, typeinfo->alignment);
+	} break;
+	}
+
+#undef leader
+}
+
+static void
+dump_expr(struct compile_ctx *ctx, struct script_expr *expr, size_t indent)
+{
+#define leader(indent) \
+	for (size_t i = 0; i < indent; i++) dbglog(ctx, "  ");
+
+	switch (expr->type) {
+	case SCR_EXPR_IDENT:
+		leader(indent)
+		dbglog(ctx, "ident: %" PRIu64 "\n", expr->ident.v);
+
+		leader(indent + 1)
+		dbglog(ctx, "typeinfo:\n");
+		dump_typeinfo(ctx, expr->typeinfo, indent + 2);
+		break;
+
+	case SCR_EXPR_LITERAL_INT:
+		leader(indent)
+		dbglog(ctx, "literal int: %" PRIu64 "\n", expr->literal_int);
+
+		leader(indent + 1)
+		dbglog(ctx, "typeinfo:\n");
+		dump_typeinfo(ctx, expr->typeinfo, indent + 2);
+		break;
+
+	case SCR_EXPR_BINARY_OP: {
+		static char const *binary_op_str[] = {
+			[SCR_BINARY_OP_ADD] = "+",
+			[SCR_BINARY_OP_SUB] = "-",
+			[SCR_BINARY_OP_MUL] = "*",
+			[SCR_BINARY_OP_DIV] = "/",
+		};
+
+		leader(indent)
+		dbglog(ctx, "binary op: %s\n", binary_op_str[expr->binary_op.type]);
+
+		leader(indent + 1)
+		dbglog(ctx, "typeinfo:\n");
+		dump_typeinfo(ctx, expr->typeinfo, indent + 2);
+
+		leader(indent + 1)
+		dbglog(ctx, "lhs:\n");
+		dump_expr(ctx, expr->binary_op.lhs, indent + 2);
+
+		leader(indent + 1)
+		dbglog(ctx, "rhs:\n");
+		dump_expr(ctx, expr->binary_op.rhs, indent + 2);
+	} break;
+	}
+
+#undef leader
+}
+
+static void
+dump_parse_tree(struct compile_ctx *ctx, struct script_stmt *node, size_t indent)
+{
+#define leader(indent) \
+	for (size_t i = 0; i < indent; i++) dbglog(ctx, "  ");
+
+	struct script_list *list;
+	switch (node->type) {
+	case SCR_STMT_BLOCK:
+		leader(indent)
+		dbglog(ctx, "block\n");
+
+		list = &node->block.children;
+		SCRIPT_LIST_ITER(list) {
+			struct script_stmt *child =
+				SCRIPT_FROM_NODE(it, struct script_stmt, list_node);
+
+			dump_parse_tree(ctx, child, indent + 1);
+		}
+		break;
+
+	case SCR_STMT_VARDECL:
+		leader(indent)
+		dbglog(ctx, "vardecl: ident: %" PRIu64 "\n", node->vardecl.ident.v);
+
+		leader(indent + 1)
+		dbglog(ctx, "typeinfo:\n");
+		dump_typeinfo(ctx, node->vardecl.typeinfo, indent + 2);
+
+		leader(indent + 1)
+		dbglog(ctx, "expr:\n");
+		dump_expr(ctx, node->vardecl.expr, indent + 2);
+		break;
+
+	case SCR_STMT_RET:
+		leader(indent)
+		dbglog(ctx, "return\n");
+
+		leader(indent + 1)
+		dbglog(ctx, "expr:\n");
+		dump_expr(ctx, node->ret.expr, indent + 2);
+		break;
+	}
+
+#undef leader
+}
+
+int
+script_parse(char *src, size_t src_len, void *mem, size_t mem_len,
+	     struct script_stmt **out, FILE *errstream, int verbose)
+{
+	struct compile_ctx ctx = {
+		.errstream = errstream,
+		.verbose = verbose,
+	};
+
+	struct arena arena = {
+		.ptr = mem,
+		.cap = mem_len,
+		.len = 0,
+	};
+
+	if (ctx.verbose)
+		dbglog(&ctx, "info: arena cap: %zu bytes, verbose: %d\n",
+			     arena.cap, verbose);
+
+	ctx.arena = &arena;
+
+	struct token_stream stream = tokenise(&ctx, src, src_len);
+	if (!stream.ptr) {
+		dbglog(&ctx, "error: failed to tokenise source\n");
+		return -1;
+	}
+
+	if (ctx.verbose)
+		dump_token_stream(&ctx, stream.ptr, stream.len);
+
+	ctx.stream = &stream;
+
+	struct script_stmt *stmt = parse(&ctx);
+	if (!stmt) {
+		dbglog(&ctx, "error: failed to parse source\n");
+		return -1;
+	}
+
+	if (verbose)
+		dump_parse_tree(&ctx, stmt, 0);
+
+	*out = stmt;
+
+	return 0;
+}
+
+#include "utils.c"
diff --git a/libscript/libscript.h b/libscript/libscript.h
@@ -0,0 +1,172 @@
+#ifndef LIBSCRIPT_H
+#define LIBSCRIPT_H
+
+#include <assert.h>
+#include <errno.h>
+#include <stdalign.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* utilities
+ * ===========================================================================
+ */
+
+#define SCRIPT_TO_PARENT(child_ptr, T, member) \
+	((child_ptr) ? (T *) ((uintptr_t) child_ptr - offsetof(T, member)) : NULL)
+
+struct script_flystr {
+	uint64_t v;
+};
+
+struct script_list_node {
+	struct script_list_node *prev, *next;
+};
+
+#define SCRIPT_FROM_NODE(node, T, member) SCRIPT_TO_PARENT(node, T, member)
+
+#define SCRIPT_NODE_ITER(node) \
+	for (struct script_list_node *it = (node); it; it = it->next)
+
+#define SCRIPT_NODE_RITER(node) \
+	for (struct script_list_node *it = (node); it; it = it->prev)
+
+struct script_list {
+	struct script_list_node *head, *tail;
+};
+
+#define SCRIPT_LIST_ITER(list) SCRIPT_NODE_ITER((list)->head)
+#define SCRIPT_LIST_RITER(list) SCRIPT_NODE_RITER((list)->tail)
+
+/* lexer
+ * ===========================================================================
+ */
+
+enum script_token_type {
+	SCR_TOKEN_EOF,
+
+	/* literals */
+	SCR_TOKEN_IDENT,
+	SCR_TOKEN_LITERAL_INT,
+
+	/* keywords */
+	SCR_TOKEN_U64,
+	SCR_TOKEN_RETURN,
+
+	/* punctuation */
+	SCR_TOKEN_LPAREN	= '(',
+	SCR_TOKEN_RPAREN	= ')',
+	SCR_TOKEN_LBRACK	= '[',
+	SCR_TOKEN_RBRACK	= ']',
+	SCR_TOKEN_LBRACE	= '{',
+	SCR_TOKEN_RBRACE	= '}',
+
+	SCR_TOKEN_LANGLE	= '<',
+	SCR_TOKEN_RANGLE	= '>',
+	SCR_TOKEN_LSLASH	= '\\',
+	SCR_TOKEN_RSLASH	= '/',
+
+	SCR_TOKEN_COLON		= ':',
+	SCR_TOKEN_SEMICOLON	= ';',
+	SCR_TOKEN_DOT		= '.',
+	SCR_TOKEN_COMMA		= ',',
+
+	SCR_TOKEN_EQUALS	= '=',
+	SCR_TOKEN_PLUS		= '+',
+	SCR_TOKEN_MINUS		= '-',
+	SCR_TOKEN_STAR		= '*',
+};
+
+struct script_token {
+	enum script_token_type type;
+
+	union {
+		struct script_flystr ident;
+		uint64_t literal_int;
+	};
+};
+
+/* parser
+ * ===========================================================================
+ */
+
+enum script_type {
+	SCR_TYPE_U64,
+};
+
+struct script_typeinfo {
+	enum script_type type;
+
+	size_t size;
+	size_t alignment;
+};
+
+enum script_expr_type {
+	SCR_EXPR_IDENT,
+	SCR_EXPR_LITERAL_INT,
+	SCR_EXPR_BINARY_OP,
+};
+
+struct script_expr {
+	enum script_expr_type type;
+
+	struct script_typeinfo *typeinfo;
+
+	union {
+		struct script_flystr ident;
+
+		uint64_t literal_int;
+
+		struct {
+			enum {
+				SCR_BINARY_OP_ADD,
+				SCR_BINARY_OP_SUB,
+				SCR_BINARY_OP_MUL,
+				SCR_BINARY_OP_DIV,
+			} type;
+
+			struct script_expr *lhs, *rhs;
+		} binary_op;
+	};
+};
+
+enum script_stmt_type {
+	SCR_STMT_BLOCK,
+	SCR_STMT_VARDECL,
+	SCR_STMT_RET,
+};
+
+struct script_stmt {
+	enum script_stmt_type type;
+
+	union {
+		struct {
+			struct script_list children;
+		} block;
+
+		struct {
+			struct script_flystr ident;
+			struct script_typeinfo *typeinfo;
+			struct script_expr *expr;
+		} vardecl;
+
+		struct {
+			struct script_expr *expr;
+		} ret;
+	};
+
+	struct script_list_node list_node;
+};
+
+/* libscript
+ * ===========================================================================
+ */
+
+extern int
+script_parse(char *src, size_t src_len, void *mem, size_t mem_len,
+	     struct script_stmt **out, FILE *errstream, int verbose);
+
+#endif /* LIBSCRIPT_H */
diff --git a/libscript/libscript_internal.h b/libscript/libscript_internal.h
@@ -0,0 +1,95 @@
+#ifndef LIBSCRIPT_INTERNAL_H
+#define LIBSCRIPT_INTERNAL_H
+
+#include "libscript.h"
+
+#include <ctype.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#define UNREACHABLE() (*((volatile char *) 0) = 0)
+#define PANIC() UNREACHABLE()
+
+#define ALIGN_PREV(v, align) ((v) & ~((align) - 1))
+#define ALIGN_NEXT(v, align) ALIGN_PREV((v) + ((align) - 1), (align))
+
+struct arena {
+	void *ptr;
+	size_t cap, len;
+};
+
+inline void
+arena_reset(struct arena *arena)
+{
+	arena->len = 0;
+}
+
+inline void *
+arena_alloc(struct arena *arena, size_t size, size_t align)
+{
+	size_t aligned_len = ALIGN_NEXT(arena->len, align);
+	if (aligned_len + size > arena->cap)
+		return NULL;
+
+	void *ptr = (void *) ((uintptr_t) arena->ptr + aligned_len);
+	arena->len = aligned_len + size;
+
+	return ptr;
+}
+
+#define ALLOC_ARRAY(arena, T, n) \
+	arena_alloc((arena), sizeof(T) * (n), alignof(T))
+
+#define ALLOC_SIZED(arena, T) ALLOC_ARRAY((arena), T, 1)
+
+inline void
+list_push_head(struct script_list *restrict list,
+	       struct script_list_node *restrict node)
+{
+	if (!list->tail)
+		list->tail = node;
+
+	if (list->head)
+		list->head->prev = node;
+
+	node->next = list->head;
+	list->head = node;
+}
+
+inline void
+list_push_tail(struct script_list *restrict list,
+	       struct script_list_node *restrict node)
+{
+	if (!list->head)
+		list->head = node;
+
+	if (list->tail)
+		list->tail->next = node;
+
+	node->prev = list->tail;
+	list->tail = node;
+}
+
+inline struct script_list_node *
+list_pop_head(struct script_list *list)
+{
+	if (!list->head)
+		return NULL;
+
+	struct script_list_node *node = list->head;
+	list->head = node->next;
+	return node;
+}
+
+inline struct script_list_node *
+list_pop_tail(struct script_list *list)
+{
+	if (!list->tail)
+		return NULL;
+
+	struct script_list_node *node = list->tail;
+	list->tail = node->prev;
+	return node;
+}
+
+#endif /* LIBSCRIPT_INTERNAL_H */
diff --git a/libscript/utils.c b/libscript/utils.c
@@ -0,0 +1,21 @@
+#include "libscript_internal.h"
+
+extern inline void
+arena_reset(struct arena *arena);
+
+extern inline void *
+arena_alloc(struct arena *arena, size_t size, size_t align);
+
+extern inline void
+list_push_head(struct script_list *restrict list,
+	       struct script_list_node *restrict node);
+
+extern inline void
+list_push_tail(struct script_list *restrict list,
+	       struct script_list_node *restrict node);
+
+extern inline struct script_list_node *
+list_pop_head(struct script_list *list);
+
+extern inline struct script_list_node *
+list_pop_tail(struct script_list *list);
diff --git a/scriptcc/scriptcc.c b/scriptcc/scriptcc.c
@@ -0,0 +1,7 @@
+#include "libscript.h"
+
+int
+main(int argc, char **argv)
+{
+	return 0;
+}
diff --git a/scriptvm/scriptvm.c b/scriptvm/scriptvm.c
@@ -0,0 +1,142 @@
+#include "libscript.h"
+
+#include <stdio.h>
+#include <getopt.h>
+
+#define MiB (1024 * 1024)
+
+struct {
+	int verbose;
+	FILE *logfile;
+	uint64_t mem;
+
+	struct {
+		char **ptr;
+		size_t len;
+	} sources;
+} opts = {
+	.verbose = 0,
+	.logfile = NULL,
+	.mem = 8192 * 1024,
+
+	.sources.ptr = NULL,
+	.sources.len = 0,
+};
+
+#define OPTSTR "hvf:m:"
+
+static void
+usage(char *prog)
+{
+	fprintf(stderr, "Usage: %s [-hv] [-m <mem-cap-mib>] sources...\n", prog);
+	fprintf(stderr, "\t-h : display usage information\n");
+	fprintf(stderr, "\t-v : enable verbose logging\n");
+	fprintf(stderr, "\t-f : file to log compilation errors to (default: stderr)\n");
+	fprintf(stderr, "\t-m : maximum memory for compilation, in MiB (default: 8 MiB)\n");
+	fprintf(stderr, "\tsources... : the source files to interpret\n");
+}
+
+static int
+parse_opts(int argc, char **argv)
+{
+	int opt;
+	while ((opt = getopt(argc, argv, OPTSTR)) > 0) {
+		switch (opt) {
+		case 'v':
+			opts.verbose = 1;
+			break;
+
+		case 'f':
+			if (!(opts.logfile = fopen(optarg, "w+"))) {
+				fprintf(stderr, "Failed to open logfile: %s\n", optarg);
+				return -1;
+			}
+			break;
+
+		case 'm':
+			if (!(opts.mem = strtoull(optarg, NULL, 0) * MiB)) {
+				fprintf(stderr, "Failed to parse memory limit: %s\n", optarg);
+				return -1;
+			}
+			break;
+
+		default:
+			return -1;
+		}
+	}
+
+	if (!opts.logfile)
+		opts.logfile = stderr;
+
+	opts.sources.ptr = argv + optind;
+	opts.sources.len = argc - optind;
+
+	if (!opts.sources.len) {
+		fprintf(stderr, "Failed to provide source files\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static void
+interpret(struct script_stmt *ast)
+{
+	(void) ast;
+
+	// TODO: interpret it
+}
+
+int
+main(int argc, char **argv)
+{
+	if (parse_opts(argc, argv)) {
+		usage(argv[0]);
+		exit(EXIT_FAILURE);
+	}
+
+	void *mem = malloc(opts.mem);
+	assert(mem);
+
+	memset(mem, 0, opts.mem);
+
+	size_t mem_len = opts.mem;
+
+	// TODO: mmap files in? read them into a single buffer?
+	for (size_t i = 0; i < opts.sources.len; i++) {
+		char *source = opts.sources.ptr[i];
+		fprintf(stderr, "Interpreting source file: %s\n", source);
+
+		FILE *fp = fopen(source, "r");
+		if (!fp) {
+			fprintf(stderr, "Failed to open source file: %s\n", source);
+			continue;
+		}
+
+		fseek(fp, 0, SEEK_END);
+		size_t src_len = ftell(fp);
+		rewind(fp);
+
+		char *src = malloc(src_len);
+		assert(src);
+
+		size_t nbytes = fread(src, 1, src_len, fp);
+		assert(nbytes == src_len);
+
+		fclose(fp);
+
+		fprintf(stderr, "\tRead %zu bytes of file, parsing...\n", src_len);
+
+		struct script_stmt *ast;
+		if (script_parse(src, src_len, mem, mem_len, &ast,
+				 opts.logfile, opts.verbose) < 0) {
+			fprintf(stderr, "Failed to parse source file: %s\n", source);
+			continue;
+		}
+
+		// emit(ast); // TODO: dump out bytecode to a file?
+		interpret(ast);
+	}
+
+	exit(EXIT_SUCCESS);
+}

	script script.git
	git clone git://git.lenczewski.org/script.git
	Log \| Files \| Refs

A	.editorconfig	\|	17	+++++++++++++++++
A	.gitignore	\|	6	++++++
A	README.txt	\|	2	++
A	build.sh	\|	26	++++++++++++++++++++++++++
A	clean.sh	\|	5	+++++
A	debug.sh	\|	16	++++++++++++++++
A	docs/script.grammar	\|	140	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	docs/simple.grammar	\|	64	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	examples/test.script	\|	2	++
A	libscript/libscript.c	\|	649	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	libscript/libscript.h	\|	172	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	libscript/libscript_internal.h	\|	95	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	libscript/utils.c	\|	21	+++++++++++++++++++++
A	scriptcc/scriptcc.c	\|	7	+++++++
A	scriptvm/scriptvm.c	\|	142	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++