script

script.git
git clone git://git.lenczewski.org/script.git
Log | Files | Refs

scriptvm.c (11601B)


      1 #define _XOPEN_SOURCE 500
      2 
      3 #include "libscript.h"
      4 
      5 #include <inttypes.h>
      6 #include <stdio.h>
      7 #include <stdlib.h>
      8 #include <unistd.h>
      9 
     10 #include <getopt.h>
     11 
     12 #define KiB (1024ULL)
     13 #define MiB (1024 * KiB)
     14 
     15 #define IS_ALIGNED(v, align) (((v) & ((align) - 1)) == 0)
     16 
     17 struct {
     18 	int verbose;
     19 	FILE *logfile;
     20 	uint64_t compiler_mem, vm_stack_mem, vm_heap_mem;
     21 	int vm_emit_bytecode;
     22 
     23 	struct {
     24 		char **ptr;
     25 		size_t len;
     26 	} sources;
     27 } opts = {
     28 	.verbose = 0,
     29 	.logfile = NULL,
     30 	.compiler_mem = 8 * MiB,
     31 	.vm_stack_mem = 4 * KiB,
     32 	.vm_heap_mem = 4 * KiB,
     33 
     34 	.sources.ptr = NULL,
     35 	.sources.len = 0,
     36 };
     37 
     38 #define OPTSTR "hvf:m:ES:M:"
     39 
     40 static void
     41 usage(char *prog)
     42 {
     43 	fprintf(stderr, "Usage: %s [-hv] [-m <mem-cap-mib>] "
     44 			"[-E] [-S <stack-kib>] [-M <head-kib>] sources...\n", prog);
     45 	fprintf(stderr, "\t-h : display usage information\n");
     46 	fprintf(stderr, "\t-v : enable verbose logging\n");
     47 	fprintf(stderr, "\t-f : file to log compilation errors to (default: stderr)\n");
     48 	fprintf(stderr, "\t-m : maximum memory for compilation, in MiB (default: 8 MiB)\n");
     49 	fprintf(stderr, "\t-E : emit human-readable ir for interpreted instructions\n");
     50 	fprintf(stderr, "\t-S : maximum stack memory for vm, in KiB (default: 4096 KiB)\n");
     51 	fprintf(stderr, "\t-M : maximum heap memory for vm, in KiB (default: 4096 KiB)\n");
     52 	fprintf(stderr, "\tsources... : the source files to interpret\n");
     53 }
     54 
     55 static int
     56 parse_opts(int argc, char **argv)
     57 {
     58 	int opt;
     59 	while ((opt = getopt(argc, argv, OPTSTR)) > 0) {
     60 		switch (opt) {
     61 		case 'v':
     62 			opts.verbose = 1;
     63 			break;
     64 
     65 		case 'f':
     66 			if (!(opts.logfile = fopen(optarg, "w+"))) {
     67 				fprintf(stderr, "Failed to open logfile: %s\n", optarg);
     68 				return -1;
     69 			}
     70 			break;
     71 
     72 		case 'm':
     73 			if (!(opts.compiler_mem = strtoull(optarg, NULL, 0) * MiB)) {
     74 				fprintf(stderr, "Failed to parse memory limit: %s\n", optarg);
     75 				return -1;
     76 			}
     77 			break;
     78 
     79 		case 'E':
     80 			opts.vm_emit_bytecode = 1;
     81 			break;
     82 
     83 		case 'S':
     84 			if (!(opts.vm_stack_mem = strtoull(optarg, NULL, 0) * KiB)) {
     85 				fprintf(stderr, "Failed to parse stack memory limit: %s\n", optarg);
     86 				return -1;
     87 			}
     88 			break;
     89 
     90 		case 'M':
     91 			if (!(opts.vm_heap_mem = strtoull(optarg, NULL, 0) * KiB)) {
     92 				fprintf(stderr, "Failed to parse heap memory limit: %s\n", optarg);
     93 				return -1;
     94 			}
     95 			break;
     96 
     97 		default:
     98 			return -1;
     99 		}
    100 	}
    101 
    102 	if (!opts.logfile)
    103 		opts.logfile = stderr;
    104 
    105 	opts.sources.ptr = argv + optind;
    106 	opts.sources.len = argc - optind;
    107 
    108 	if (!opts.sources.len) {
    109 		fprintf(stderr, "Failed to provide source files\n");
    110 		return -1;
    111 	}
    112 
    113 	return 0;
    114 }
    115 
    116 static void
    117 emit(int fd, struct script_program const *prog)
    118 {
    119 	for (size_t i = 0; i < prog->instructions.len; i++) {
    120 		struct script_ir_inst *inst = prog->instructions.ptr + i;
    121 
    122 		dprintf(fd, "[%03zu] %5s ", i, script_ir_opcode_str(inst->opcode));
    123 
    124 		if (inst->typeinfo)
    125 			dprintf(fd, "<%s> ", script_ir_type_str(inst->typeinfo->type));
    126 
    127 		for (size_t j = 0; j < inst->operand_count; j++) {
    128 			struct script_ir_operand *operand = &inst->operands[j];
    129 
    130 			switch (operand->type) {
    131 			case SCR_IR_OPERAND_LITERAL:
    132 				dprintf(fd, "LITERAL{0x%" PRIx64 "}", operand->literal);
    133 				break;
    134 
    135 			case SCR_IR_OPERAND_ADDRESS:
    136 				dprintf(fd, "ADDRESS{0x%" PRIx64 "}", operand->address);
    137 				break;
    138 
    139 			case SCR_IR_OPERAND_OFFSET:
    140 				dprintf(fd, "OFFSET{%" PRIi64 "}", operand->offset);
    141 				break;
    142 			}
    143 
    144 			dprintf(fd, ",");
    145 		}
    146 
    147 		dprintf(fd, "\n");
    148 	}
    149 }
    150 
    151 typedef uint64_t reg_t;
    152 
    153 struct vm_state {
    154 	reg_t pc, sp, acc;
    155 
    156 	reg_t r0;
    157 
    158 	struct {
    159 		unsigned char *ptr;
    160 		size_t len;
    161 	} stack, heap;
    162 };
    163 
    164 static void
    165 dump_vm_state(struct vm_state *vm)
    166 {
    167 	fprintf(stderr, "vm state:\n");
    168 	fprintf(stderr, "\tpc: 0x%" PRIx64 ", sp: 0x%" PRIx64 ", acc: 0x%" PRIx64 "\n",
    169 			vm->pc, vm->sp, vm->acc);
    170 	fprintf(stderr, "\tr0: 0x%" PRIx64 "\n", vm->r0);
    171 }
    172 
    173 static inline uint64_t
    174 pop(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo)
    175 {
    176 	uint64_t value = 0;
    177 
    178 	assert(typeinfo->size <= vm->sp); /* stack underrun */
    179 	vm->sp -= typeinfo->size;
    180 
    181 	unsigned char *storage = vm->stack.ptr + vm->sp;
    182 	assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment));
    183 	memcpy(&value, storage, typeinfo->size);
    184 
    185 	vm->acc = value;
    186 
    187 	return value;
    188 }
    189 
    190 static inline void
    191 push(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo, uint64_t value)
    192 {
    193 	assert(vm->sp + typeinfo->size < vm->stack.len);
    194 
    195 	unsigned char *storage = vm->stack.ptr + vm->sp;
    196 	assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment));
    197 	memcpy(storage, &value, typeinfo->size);
    198 
    199 	// TODO: should push update the accumulator?
    200 	vm->acc = value;
    201 
    202 	vm->sp += typeinfo->size;
    203 }
    204 
    205 static inline void
    206 load(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo, uintptr_t addr)
    207 {
    208 	uint64_t value = 0;
    209 
    210 	assert(addr + typeinfo->size < vm->heap.len);
    211 	unsigned char *storage = vm->heap.ptr + addr;
    212 	assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment));
    213 	memcpy(&value, storage, typeinfo->size);
    214 
    215 	vm->acc = value;
    216 
    217 	push(vm, typeinfo, value);
    218 }
    219 
    220 static inline void
    221 store(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo, uintptr_t addr, uint64_t value)
    222 {
    223 	assert(addr + typeinfo->size < vm->heap.len);
    224 	unsigned char *storage = vm->heap.ptr + addr;
    225 	assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment));
    226 	memcpy(storage, &value, typeinfo->size);
    227 
    228 	// TODO: should push update the accumulator?
    229 	vm->acc = value;
    230 }
    231 
    232 static inline uint64_t
    233 arithmetic(enum script_ir_opcode opcode, struct script_ir_typeinfo const *typeinfo,
    234 	   uint64_t lhs, uint64_t rhs)
    235 {
    236 #define map(lhs, rhs, op, Tin, Tout) \
    237 	((Tout) (((Tin) (lhs)) op ((Tin) (rhs))))
    238 
    239 #define vmop(op) \
    240 	switch (typeinfo->type) { \
    241 	case SCR_IR_TYPE_U8:	res = map(lhs, rhs, op, uint8_t,   uint64_t); \
    242 	case SCR_IR_TYPE_U16:	res = map(lhs, rhs, op, uint16_t,  uint64_t); \
    243 	case SCR_IR_TYPE_U32:	res = map(lhs, rhs, op, uint32_t,  uint64_t); \
    244 	case SCR_IR_TYPE_U64:	res = map(lhs, rhs, op, uint64_t,  uint64_t); \
    245 	case SCR_IR_TYPE_S8:	res = map(lhs, rhs, op, int8_t,    uint64_t); \
    246 	case SCR_IR_TYPE_S16:	res = map(lhs, rhs, op, int16_t,   uint64_t); \
    247 	case SCR_IR_TYPE_S32:	res = map(lhs, rhs, op, int32_t,   uint64_t); \
    248 	case SCR_IR_TYPE_S64:	res = map(lhs, rhs, op, int64_t,   uint64_t); \
    249 	case SCR_IR_TYPE_F32:	res = map(lhs, rhs, op, float,     uint64_t); \
    250 	case SCR_IR_TYPE_F64:	res = map(lhs, rhs, op, double,    uint64_t); \
    251 	case SCR_IR_TYPE_C8:	res = map(lhs, rhs, op, uint8_t,   uint64_t); \
    252 	case SCR_IR_TYPE_PTR:	res = map(lhs, rhs, op, uintptr_t, uint64_t); \
    253 	}
    254 
    255 	uint64_t res;
    256 	switch (opcode) {
    257 	case SCR_IR_ADD: {
    258 		vmop(+)
    259 	} break;
    260 	case SCR_IR_SUB: {
    261 		vmop(-)
    262 	} break;
    263 	case SCR_IR_MUL: {
    264 		vmop(*)
    265 	} break;
    266 	case SCR_IR_DIV: {
    267 		vmop(/)
    268 	} break;
    269 
    270 	default: assert(0);
    271 	}
    272 
    273 	if (opts.verbose)
    274 		fprintf(stderr, "arith: %d, lhs: 0x%" PRIx64 ", rhs: 0x%" PRIx64 ", "
    275 				"res: 0x%" PRIx64 "\n", opcode, lhs, rhs, res);
    276 
    277 	return res;
    278 }
    279 
    280 static uint64_t
    281 interpret(struct vm_state *vm, struct script_program *program)
    282 {
    283 	while (vm->pc < program->instructions.len) {
    284 		struct script_ir_inst *inst = &program->instructions.ptr[vm->pc];
    285 
    286 		if (opts.verbose)
    287 			fprintf(stderr, "inst: %s\n", script_ir_opcode_str(inst->opcode));
    288 
    289 		switch (inst->opcode) {
    290 		case SCR_IR_LOAD: {
    291 			assert(inst->operand_count);
    292 			assert(inst->operands[0].type == SCR_IR_OPERAND_ADDRESS);
    293 			load(vm, inst->typeinfo, inst->operands[0].address);
    294 		} break;
    295 
    296 		case SCR_IR_STORE: {
    297 			assert(inst->operand_count);
    298 			assert(inst->operands[0].type == SCR_IR_OPERAND_ADDRESS);
    299 			uint64_t value = pop(vm, inst->typeinfo);
    300 			store(vm, inst->typeinfo, inst->operands[0].address, value);
    301 		} break;
    302 
    303 		case SCR_IR_PUSH: {
    304 			assert(inst->operand_count);
    305 			assert(inst->operands[0].type == SCR_IR_OPERAND_LITERAL);
    306 			push(vm, inst->typeinfo, inst->operands[0].literal);
    307 		} break;
    308 
    309 		case SCR_IR_POP: {
    310 			assert(inst->operand_count == 0);
    311 			(void) pop(vm, inst->typeinfo);
    312 		} break;
    313 
    314 		case SCR_IR_RET: {
    315 			assert(inst->operand_count == 0);
    316 			vm->r0 = pop(vm, inst->typeinfo);
    317 		} break;
    318 
    319 		case SCR_IR_CMP: {
    320 			assert(inst->operand_count == 0);
    321 			uint64_t lhs = pop(vm, inst->typeinfo);
    322 			uint64_t rhs = pop(vm, inst->typeinfo);
    323 			vm->acc = lhs - rhs;
    324 		} break;
    325 
    326 		case SCR_IR_JMP: {
    327 			assert(inst->operand_count == 1);
    328 			assert(inst->operands[0].type == SCR_IR_OPERAND_OFFSET);
    329 			vm->pc += inst->operands[0].literal;
    330 			goto next_vm_iter;
    331 		} break;
    332 
    333 		case SCR_IR_JNE: {
    334 			assert(inst->operand_count == 1);
    335 			assert(inst->operands[0].type == SCR_IR_OPERAND_OFFSET);
    336 			if (vm->acc != 0) {
    337 				vm->pc += inst->operands[0].literal;
    338 				goto next_vm_iter;
    339 			}
    340 		} break;
    341 
    342 		case SCR_IR_JEQ: {
    343 			assert(inst->operand_count == 1);
    344 			assert(inst->operands[0].type == SCR_IR_OPERAND_OFFSET);
    345 			if (vm->acc == 0) {
    346 				vm->pc += inst->operands[0].literal;
    347 				goto next_vm_iter;
    348 			}
    349 		} break;
    350 
    351 		case SCR_IR_ADD:
    352 		case SCR_IR_SUB:
    353 		case SCR_IR_MUL:
    354 		case SCR_IR_DIV: {
    355 			assert(inst->operand_count == 0);
    356 			uint64_t rhs = pop(vm, inst->typeinfo);
    357 			uint64_t lhs = pop(vm, inst->typeinfo);
    358 			uint64_t res = arithmetic(inst->opcode, inst->typeinfo, lhs, rhs);
    359 			vm->acc = res;
    360 			push(vm, inst->typeinfo, res);
    361 		} break;
    362 		}
    363 
    364 		vm->pc++;
    365 
    366 next_vm_iter:
    367 		if (opts.verbose)
    368 			dump_vm_state(vm);
    369 	}
    370 
    371 	return vm->r0;
    372 }
    373 
    374 int
    375 main(int argc, char **argv)
    376 {
    377 	if (parse_opts(argc, argv)) {
    378 		usage(argv[0]);
    379 		exit(EXIT_FAILURE);
    380 	}
    381 
    382 	void *compiler_mem = malloc(opts.compiler_mem);
    383 	assert(compiler_mem);
    384 	memset(compiler_mem, 0, opts.compiler_mem);
    385 
    386 	unsigned char *vm_stack_mem = malloc(opts.vm_stack_mem);
    387 	assert(vm_stack_mem);
    388 
    389 	unsigned char *vm_heap_mem = malloc(opts.vm_heap_mem);
    390 	assert(vm_heap_mem);
    391 
    392 	for (size_t i = 0; i < opts.sources.len; i++) {
    393 		char *source = opts.sources.ptr[i];
    394 		FILE *fp = fopen(source, "r");
    395 		if (!fp) {
    396 			fprintf(stderr, "Failed to open source file: %s\n", source);
    397 			continue;
    398 		}
    399 
    400 		fseek(fp, 0, SEEK_END);
    401 		size_t src_len = ftell(fp);
    402 		rewind(fp);
    403 
    404 		char *src = malloc(src_len);
    405 		assert(src);
    406 
    407 		size_t nbytes = fread(src, 1, src_len, fp);
    408 		assert(nbytes == src_len);
    409 
    410 		fclose(fp);
    411 
    412 		fprintf(stderr, "[%s] Read %zu bytes of source file\n", source, src_len);
    413 
    414 		struct script_program program;
    415 		if (script_compile(src, src_len, compiler_mem, opts.compiler_mem,
    416 				   &program, opts.logfile, opts.verbose) < 0) {
    417 			fprintf(stderr, "Failed to compile source file: %s\n", source);
    418 			goto next_source;
    419 		}
    420 
    421 		fprintf(stderr, "[%s] Compiled program: %zu instructions, uses %zu bytes of heap\n",
    422 				source, program.instructions.len, program.max_heap_bytes);
    423 
    424 		if (opts.vm_heap_mem < program.max_heap_bytes) {
    425 			fprintf(stderr, "\tNot enough heap memory to run this program!\n");
    426 			goto next_source;
    427 		}
    428 
    429 		if (opts.vm_emit_bytecode) {
    430 			char tmpfile_path[128] = "/tmp/bytecode-XXXXXX";
    431 
    432 			int fd = mkstemp(tmpfile_path);
    433 			assert(fd > 0);
    434 
    435 			fprintf(stderr, "[%s] Emitting bytecode to file: %s\n",
    436 					source, tmpfile_path);
    437 
    438 			emit(fd, &program);
    439 
    440 			close(fd);
    441 		}
    442 
    443 		fprintf(stderr, "[%s] Interpreting with %zu bytes of stack, and %zu bytes of heap\n",
    444 				source, opts.vm_stack_mem, opts.vm_heap_mem);
    445 
    446 		struct vm_state vm;
    447 		memset(&vm, 0, sizeof vm);
    448 
    449 		memset(vm_stack_mem, 0, opts.vm_stack_mem);
    450 		vm.stack.ptr = vm_stack_mem;
    451 		vm.stack.len = opts.vm_stack_mem;
    452 
    453 		memset(vm_heap_mem, 0, opts.vm_heap_mem);
    454 		vm.heap.ptr = vm_heap_mem;
    455 		vm.heap.len = opts.vm_heap_mem;
    456 
    457 		if (opts.verbose)
    458 			dump_vm_state(&vm);
    459 
    460 		uint64_t res = interpret(&vm, &program);
    461 
    462 		fprintf(stderr, "[%s] Finished execution with value: 0x%" PRIx64 "\n",
    463 				source, res);
    464 
    465 		dump_vm_state(&vm);
    466 
    467 next_source:
    468 		free(src);
    469 	}
    470 
    471 	exit(EXIT_SUCCESS);
    472 }