scriptvm.c (11601B)
1 #define _XOPEN_SOURCE 500 2 3 #include "libscript.h" 4 5 #include <inttypes.h> 6 #include <stdio.h> 7 #include <stdlib.h> 8 #include <unistd.h> 9 10 #include <getopt.h> 11 12 #define KiB (1024ULL) 13 #define MiB (1024 * KiB) 14 15 #define IS_ALIGNED(v, align) (((v) & ((align) - 1)) == 0) 16 17 struct { 18 int verbose; 19 FILE *logfile; 20 uint64_t compiler_mem, vm_stack_mem, vm_heap_mem; 21 int vm_emit_bytecode; 22 23 struct { 24 char **ptr; 25 size_t len; 26 } sources; 27 } opts = { 28 .verbose = 0, 29 .logfile = NULL, 30 .compiler_mem = 8 * MiB, 31 .vm_stack_mem = 4 * KiB, 32 .vm_heap_mem = 4 * KiB, 33 34 .sources.ptr = NULL, 35 .sources.len = 0, 36 }; 37 38 #define OPTSTR "hvf:m:ES:M:" 39 40 static void 41 usage(char *prog) 42 { 43 fprintf(stderr, "Usage: %s [-hv] [-m <mem-cap-mib>] " 44 "[-E] [-S <stack-kib>] [-M <head-kib>] sources...\n", prog); 45 fprintf(stderr, "\t-h : display usage information\n"); 46 fprintf(stderr, "\t-v : enable verbose logging\n"); 47 fprintf(stderr, "\t-f : file to log compilation errors to (default: stderr)\n"); 48 fprintf(stderr, "\t-m : maximum memory for compilation, in MiB (default: 8 MiB)\n"); 49 fprintf(stderr, "\t-E : emit human-readable ir for interpreted instructions\n"); 50 fprintf(stderr, "\t-S : maximum stack memory for vm, in KiB (default: 4096 KiB)\n"); 51 fprintf(stderr, "\t-M : maximum heap memory for vm, in KiB (default: 4096 KiB)\n"); 52 fprintf(stderr, "\tsources... : the source files to interpret\n"); 53 } 54 55 static int 56 parse_opts(int argc, char **argv) 57 { 58 int opt; 59 while ((opt = getopt(argc, argv, OPTSTR)) > 0) { 60 switch (opt) { 61 case 'v': 62 opts.verbose = 1; 63 break; 64 65 case 'f': 66 if (!(opts.logfile = fopen(optarg, "w+"))) { 67 fprintf(stderr, "Failed to open logfile: %s\n", optarg); 68 return -1; 69 } 70 break; 71 72 case 'm': 73 if (!(opts.compiler_mem = strtoull(optarg, NULL, 0) * MiB)) { 74 fprintf(stderr, "Failed to parse memory limit: %s\n", optarg); 75 return -1; 76 } 77 break; 78 79 case 'E': 80 opts.vm_emit_bytecode = 1; 81 break; 82 83 case 'S': 84 if (!(opts.vm_stack_mem = strtoull(optarg, NULL, 0) * KiB)) { 85 fprintf(stderr, "Failed to parse stack memory limit: %s\n", optarg); 86 return -1; 87 } 88 break; 89 90 case 'M': 91 if (!(opts.vm_heap_mem = strtoull(optarg, NULL, 0) * KiB)) { 92 fprintf(stderr, "Failed to parse heap memory limit: %s\n", optarg); 93 return -1; 94 } 95 break; 96 97 default: 98 return -1; 99 } 100 } 101 102 if (!opts.logfile) 103 opts.logfile = stderr; 104 105 opts.sources.ptr = argv + optind; 106 opts.sources.len = argc - optind; 107 108 if (!opts.sources.len) { 109 fprintf(stderr, "Failed to provide source files\n"); 110 return -1; 111 } 112 113 return 0; 114 } 115 116 static void 117 emit(int fd, struct script_program const *prog) 118 { 119 for (size_t i = 0; i < prog->instructions.len; i++) { 120 struct script_ir_inst *inst = prog->instructions.ptr + i; 121 122 dprintf(fd, "[%03zu] %5s ", i, script_ir_opcode_str(inst->opcode)); 123 124 if (inst->typeinfo) 125 dprintf(fd, "<%s> ", script_ir_type_str(inst->typeinfo->type)); 126 127 for (size_t j = 0; j < inst->operand_count; j++) { 128 struct script_ir_operand *operand = &inst->operands[j]; 129 130 switch (operand->type) { 131 case SCR_IR_OPERAND_LITERAL: 132 dprintf(fd, "LITERAL{0x%" PRIx64 "}", operand->literal); 133 break; 134 135 case SCR_IR_OPERAND_ADDRESS: 136 dprintf(fd, "ADDRESS{0x%" PRIx64 "}", operand->address); 137 break; 138 139 case SCR_IR_OPERAND_OFFSET: 140 dprintf(fd, "OFFSET{%" PRIi64 "}", operand->offset); 141 break; 142 } 143 144 dprintf(fd, ","); 145 } 146 147 dprintf(fd, "\n"); 148 } 149 } 150 151 typedef uint64_t reg_t; 152 153 struct vm_state { 154 reg_t pc, sp, acc; 155 156 reg_t r0; 157 158 struct { 159 unsigned char *ptr; 160 size_t len; 161 } stack, heap; 162 }; 163 164 static void 165 dump_vm_state(struct vm_state *vm) 166 { 167 fprintf(stderr, "vm state:\n"); 168 fprintf(stderr, "\tpc: 0x%" PRIx64 ", sp: 0x%" PRIx64 ", acc: 0x%" PRIx64 "\n", 169 vm->pc, vm->sp, vm->acc); 170 fprintf(stderr, "\tr0: 0x%" PRIx64 "\n", vm->r0); 171 } 172 173 static inline uint64_t 174 pop(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo) 175 { 176 uint64_t value = 0; 177 178 assert(typeinfo->size <= vm->sp); /* stack underrun */ 179 vm->sp -= typeinfo->size; 180 181 unsigned char *storage = vm->stack.ptr + vm->sp; 182 assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment)); 183 memcpy(&value, storage, typeinfo->size); 184 185 vm->acc = value; 186 187 return value; 188 } 189 190 static inline void 191 push(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo, uint64_t value) 192 { 193 assert(vm->sp + typeinfo->size < vm->stack.len); 194 195 unsigned char *storage = vm->stack.ptr + vm->sp; 196 assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment)); 197 memcpy(storage, &value, typeinfo->size); 198 199 // TODO: should push update the accumulator? 200 vm->acc = value; 201 202 vm->sp += typeinfo->size; 203 } 204 205 static inline void 206 load(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo, uintptr_t addr) 207 { 208 uint64_t value = 0; 209 210 assert(addr + typeinfo->size < vm->heap.len); 211 unsigned char *storage = vm->heap.ptr + addr; 212 assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment)); 213 memcpy(&value, storage, typeinfo->size); 214 215 vm->acc = value; 216 217 push(vm, typeinfo, value); 218 } 219 220 static inline void 221 store(struct vm_state *vm, struct script_ir_typeinfo const *typeinfo, uintptr_t addr, uint64_t value) 222 { 223 assert(addr + typeinfo->size < vm->heap.len); 224 unsigned char *storage = vm->heap.ptr + addr; 225 assert(IS_ALIGNED((uintptr_t) storage, typeinfo->alignment)); 226 memcpy(storage, &value, typeinfo->size); 227 228 // TODO: should push update the accumulator? 229 vm->acc = value; 230 } 231 232 static inline uint64_t 233 arithmetic(enum script_ir_opcode opcode, struct script_ir_typeinfo const *typeinfo, 234 uint64_t lhs, uint64_t rhs) 235 { 236 #define map(lhs, rhs, op, Tin, Tout) \ 237 ((Tout) (((Tin) (lhs)) op ((Tin) (rhs)))) 238 239 #define vmop(op) \ 240 switch (typeinfo->type) { \ 241 case SCR_IR_TYPE_U8: res = map(lhs, rhs, op, uint8_t, uint64_t); \ 242 case SCR_IR_TYPE_U16: res = map(lhs, rhs, op, uint16_t, uint64_t); \ 243 case SCR_IR_TYPE_U32: res = map(lhs, rhs, op, uint32_t, uint64_t); \ 244 case SCR_IR_TYPE_U64: res = map(lhs, rhs, op, uint64_t, uint64_t); \ 245 case SCR_IR_TYPE_S8: res = map(lhs, rhs, op, int8_t, uint64_t); \ 246 case SCR_IR_TYPE_S16: res = map(lhs, rhs, op, int16_t, uint64_t); \ 247 case SCR_IR_TYPE_S32: res = map(lhs, rhs, op, int32_t, uint64_t); \ 248 case SCR_IR_TYPE_S64: res = map(lhs, rhs, op, int64_t, uint64_t); \ 249 case SCR_IR_TYPE_F32: res = map(lhs, rhs, op, float, uint64_t); \ 250 case SCR_IR_TYPE_F64: res = map(lhs, rhs, op, double, uint64_t); \ 251 case SCR_IR_TYPE_C8: res = map(lhs, rhs, op, uint8_t, uint64_t); \ 252 case SCR_IR_TYPE_PTR: res = map(lhs, rhs, op, uintptr_t, uint64_t); \ 253 } 254 255 uint64_t res; 256 switch (opcode) { 257 case SCR_IR_ADD: { 258 vmop(+) 259 } break; 260 case SCR_IR_SUB: { 261 vmop(-) 262 } break; 263 case SCR_IR_MUL: { 264 vmop(*) 265 } break; 266 case SCR_IR_DIV: { 267 vmop(/) 268 } break; 269 270 default: assert(0); 271 } 272 273 if (opts.verbose) 274 fprintf(stderr, "arith: %d, lhs: 0x%" PRIx64 ", rhs: 0x%" PRIx64 ", " 275 "res: 0x%" PRIx64 "\n", opcode, lhs, rhs, res); 276 277 return res; 278 } 279 280 static uint64_t 281 interpret(struct vm_state *vm, struct script_program *program) 282 { 283 while (vm->pc < program->instructions.len) { 284 struct script_ir_inst *inst = &program->instructions.ptr[vm->pc]; 285 286 if (opts.verbose) 287 fprintf(stderr, "inst: %s\n", script_ir_opcode_str(inst->opcode)); 288 289 switch (inst->opcode) { 290 case SCR_IR_LOAD: { 291 assert(inst->operand_count); 292 assert(inst->operands[0].type == SCR_IR_OPERAND_ADDRESS); 293 load(vm, inst->typeinfo, inst->operands[0].address); 294 } break; 295 296 case SCR_IR_STORE: { 297 assert(inst->operand_count); 298 assert(inst->operands[0].type == SCR_IR_OPERAND_ADDRESS); 299 uint64_t value = pop(vm, inst->typeinfo); 300 store(vm, inst->typeinfo, inst->operands[0].address, value); 301 } break; 302 303 case SCR_IR_PUSH: { 304 assert(inst->operand_count); 305 assert(inst->operands[0].type == SCR_IR_OPERAND_LITERAL); 306 push(vm, inst->typeinfo, inst->operands[0].literal); 307 } break; 308 309 case SCR_IR_POP: { 310 assert(inst->operand_count == 0); 311 (void) pop(vm, inst->typeinfo); 312 } break; 313 314 case SCR_IR_RET: { 315 assert(inst->operand_count == 0); 316 vm->r0 = pop(vm, inst->typeinfo); 317 } break; 318 319 case SCR_IR_CMP: { 320 assert(inst->operand_count == 0); 321 uint64_t lhs = pop(vm, inst->typeinfo); 322 uint64_t rhs = pop(vm, inst->typeinfo); 323 vm->acc = lhs - rhs; 324 } break; 325 326 case SCR_IR_JMP: { 327 assert(inst->operand_count == 1); 328 assert(inst->operands[0].type == SCR_IR_OPERAND_OFFSET); 329 vm->pc += inst->operands[0].literal; 330 goto next_vm_iter; 331 } break; 332 333 case SCR_IR_JNE: { 334 assert(inst->operand_count == 1); 335 assert(inst->operands[0].type == SCR_IR_OPERAND_OFFSET); 336 if (vm->acc != 0) { 337 vm->pc += inst->operands[0].literal; 338 goto next_vm_iter; 339 } 340 } break; 341 342 case SCR_IR_JEQ: { 343 assert(inst->operand_count == 1); 344 assert(inst->operands[0].type == SCR_IR_OPERAND_OFFSET); 345 if (vm->acc == 0) { 346 vm->pc += inst->operands[0].literal; 347 goto next_vm_iter; 348 } 349 } break; 350 351 case SCR_IR_ADD: 352 case SCR_IR_SUB: 353 case SCR_IR_MUL: 354 case SCR_IR_DIV: { 355 assert(inst->operand_count == 0); 356 uint64_t rhs = pop(vm, inst->typeinfo); 357 uint64_t lhs = pop(vm, inst->typeinfo); 358 uint64_t res = arithmetic(inst->opcode, inst->typeinfo, lhs, rhs); 359 vm->acc = res; 360 push(vm, inst->typeinfo, res); 361 } break; 362 } 363 364 vm->pc++; 365 366 next_vm_iter: 367 if (opts.verbose) 368 dump_vm_state(vm); 369 } 370 371 return vm->r0; 372 } 373 374 int 375 main(int argc, char **argv) 376 { 377 if (parse_opts(argc, argv)) { 378 usage(argv[0]); 379 exit(EXIT_FAILURE); 380 } 381 382 void *compiler_mem = malloc(opts.compiler_mem); 383 assert(compiler_mem); 384 memset(compiler_mem, 0, opts.compiler_mem); 385 386 unsigned char *vm_stack_mem = malloc(opts.vm_stack_mem); 387 assert(vm_stack_mem); 388 389 unsigned char *vm_heap_mem = malloc(opts.vm_heap_mem); 390 assert(vm_heap_mem); 391 392 for (size_t i = 0; i < opts.sources.len; i++) { 393 char *source = opts.sources.ptr[i]; 394 FILE *fp = fopen(source, "r"); 395 if (!fp) { 396 fprintf(stderr, "Failed to open source file: %s\n", source); 397 continue; 398 } 399 400 fseek(fp, 0, SEEK_END); 401 size_t src_len = ftell(fp); 402 rewind(fp); 403 404 char *src = malloc(src_len); 405 assert(src); 406 407 size_t nbytes = fread(src, 1, src_len, fp); 408 assert(nbytes == src_len); 409 410 fclose(fp); 411 412 fprintf(stderr, "[%s] Read %zu bytes of source file\n", source, src_len); 413 414 struct script_program program; 415 if (script_compile(src, src_len, compiler_mem, opts.compiler_mem, 416 &program, opts.logfile, opts.verbose) < 0) { 417 fprintf(stderr, "Failed to compile source file: %s\n", source); 418 goto next_source; 419 } 420 421 fprintf(stderr, "[%s] Compiled program: %zu instructions, uses %zu bytes of heap\n", 422 source, program.instructions.len, program.max_heap_bytes); 423 424 if (opts.vm_heap_mem < program.max_heap_bytes) { 425 fprintf(stderr, "\tNot enough heap memory to run this program!\n"); 426 goto next_source; 427 } 428 429 if (opts.vm_emit_bytecode) { 430 char tmpfile_path[128] = "/tmp/bytecode-XXXXXX"; 431 432 int fd = mkstemp(tmpfile_path); 433 assert(fd > 0); 434 435 fprintf(stderr, "[%s] Emitting bytecode to file: %s\n", 436 source, tmpfile_path); 437 438 emit(fd, &program); 439 440 close(fd); 441 } 442 443 fprintf(stderr, "[%s] Interpreting with %zu bytes of stack, and %zu bytes of heap\n", 444 source, opts.vm_stack_mem, opts.vm_heap_mem); 445 446 struct vm_state vm; 447 memset(&vm, 0, sizeof vm); 448 449 memset(vm_stack_mem, 0, opts.vm_stack_mem); 450 vm.stack.ptr = vm_stack_mem; 451 vm.stack.len = opts.vm_stack_mem; 452 453 memset(vm_heap_mem, 0, opts.vm_heap_mem); 454 vm.heap.ptr = vm_heap_mem; 455 vm.heap.len = opts.vm_heap_mem; 456 457 if (opts.verbose) 458 dump_vm_state(&vm); 459 460 uint64_t res = interpret(&vm, &program); 461 462 fprintf(stderr, "[%s] Finished execution with value: 0x%" PRIx64 "\n", 463 source, res); 464 465 dump_vm_state(&vm); 466 467 next_source: 468 free(src); 469 } 470 471 exit(EXIT_SUCCESS); 472 }