Skip to content

Commit

Permalink
Input ports (#19) (#40)
Browse files Browse the repository at this point in the history
* Initial implementation of input ports (#19)

* Pack port structure into fewer bytes

* Cleanup C code for ports

* Reduce code duplicated from char.c

* remove TODO

* Fix bug in utf8 encoding for of strings

The high bits of packed characters in a string were not zeroed before
encoding the character.

Co-authored-by: David Van Horn <[email protected]>
  • Loading branch information
john-h-kastner and dvanhorn authored Feb 23, 2021
1 parent e553e88 commit d3d5d0d
Show file tree
Hide file tree
Showing 15 changed files with 336 additions and 59 deletions.
2 changes: 1 addition & 1 deletion a86/ast.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@
(define offset? Offset?)

(define (register? x)
(and (memq x '(rax rbx rcx rdx rbp rsp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15))
(and (memq x '(rax rbx rcx rdx rbp rsp rsi rdi r8 r9 r10 r11 r12 r13 r14 r15 al))
#t))

(define (label? x)
Expand Down
9 changes: 5 additions & 4 deletions villain/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@ objs = \
char.o \
io.o \
symbol.o \
str.o
str.o \
utf8.o

default: runtime.o

main.o: types.h runtime.h
char.o: types.h
io.o: runtime.h
main.o: types.h runtime.h char.h
char.o: types.h utf8.h char.h
io.o: runtime.h utf8.h char.h
symbol.o: str.h

%.run: %.o runtime.o
Expand Down
47 changes: 8 additions & 39 deletions villain/char.c
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#include <stdio.h>
#include <inttypes.h>
#include "types.h"

void print_codepoint(int64_t);
void print_str(int64_t *);
void print_str_char(int64_t);
#include "utf8.h"
#include "char.h"

void print_char (int64_t v) {
int64_t codepoint = v >> char_shift;
Expand Down Expand Up @@ -33,45 +31,16 @@ void print_char (int64_t v) {
}
}

void print_codepoint(int64_t v) {
int64_t codepoint = v >> char_shift;
// Print using UTF-8 encoding of codepoint
// https://en.wikipedia.org/wiki/UTF-8
if (codepoint < 128) {
printf("%c", (char) codepoint);
} else if (codepoint < 2048) {
printf("%c%c",
(char)(codepoint >> 6) | 192,
((char)codepoint & 63) | 128);
} else if (codepoint < 65536) {
printf("%c%c%c",
(char)(codepoint >> 12) | 224,
((char)(codepoint >> 6) & 63) | 128,
((char)codepoint & 63) | 128);
} else {
printf("%c%c%c%c",
(char)(codepoint >> 18) | 240,
((char)(codepoint >> 12) & 63) | 128,
((char)(codepoint >> 6) & 63) | 128,
((char)codepoint & 63) | 128);
}
int32_t get_str_codepoint(int64_t *str, int64_t codepoint_idx) {
int i = 1 + codepoint_idx / 3;
int j = codepoint_idx % 3;
return 0x1FFFFF & (str[i] >> (j * 21));
}

void print_str(int64_t *str) {
int64_t len = (str[0] >> int_shift);
int temp;
int i, j;
int n = (len % 3 == 0) ? len / 3 : (len / 3 + 1);
for (i = 1; i < n; i++) {
for (j = 0; j < 3; j++) {
temp = str[i] >> (j * 21);
print_str_char(temp);
}
}
i = (len % 3 == 0) ? 3 : (len % 3);
for (j = 0; j < i; j++){
temp = str[n] >> (j * 21);
print_str_char(temp);
for (int64_t i = 0 ; i < len; i++) {
print_str_char(get_str_codepoint(str, i));
}
}

Expand Down
10 changes: 10 additions & 0 deletions villain/char.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef VILLAIN_CHAR_H
#define VILLAIN_CHAR_H

#include <inttypes.h>

void print_str(int64_t *);
void print_str_char(int64_t);
int32_t get_str_codepoint(int64_t *str, int64_t codepoint_idx);

#endif
90 changes: 84 additions & 6 deletions villain/compile.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,21 @@
; and string-set!

(define r8 'r8) ; scratch in +, -, compile-chars, compile-prim2, string-ref,
; make-string, compile-prim3, string-ref!, integer-length, match,
; compile-define
; make-string, compile-prim3, string-ref!, integer-length, match,
; compile-define, open-input-file
(define r9 'r9) ; scratch in assert-type, compile-str-chars, string-ref,
; string-set!, make-string, compile-define, compile-fl+
; compile-vector, vector-set!, vector-ref
(define rsp 'rsp) ; stack
(define rdi 'rdi) ; arg
(define r10 'r10) ; scratch in compile-prim3, make-string, string-set!, compile-define, compile-fl+
; compile-vector, vector-set!
(define rsi 'rsi) ; arg2
(define r10 'r10) ; scratch in compile-prim3, make-string, string-set!, compile-vector, vector-set!
; compile-define, compile-fl+
(define r11 'r11) ; scratch in compile-fl+
(define r12 'r12) ; scratch in compile-fl+
(define r13 'r13) ; scratch in compile-fl+
(define rcx 'rcx) ; arity indicator
(define al 'al) ; low byte of rax ; open-input-file

;; type CEnv = [Listof Variable]

Expand Down Expand Up @@ -410,7 +412,7 @@
(Xor rax type-string)
(Mov rax (Offset rax 0)))]
['string?
(type-pred ptr-mask type-string)]
(type-pred ptr-mask type-string)]
['string->symbol
(seq (assert-string rax c)
(Xor rax type-string)
Expand All @@ -424,8 +426,82 @@
(Xor rax type-symbol) ; replace symbol tag with str
(Or rax type-string))]
['symbol?
(type-pred ptr-mask type-symbol)]
(type-pred ptr-mask type-symbol)]
['empty? (eq-imm val-empty)]
['port?
(type-pred ptr-mask type-port)]
['open-input-file
(seq
(assert-string rax c)

;; Save the heap pointer as second argument for c function call
(Mov rsi rbx)
;; Allocate a buffer on the heap for the c-string
(Xor rax type-string)
;; r8 <- chars in input string
(Mov r8 (Offset rax 0))
;; (r8 * 4) + 1 is upper bound on bytes
(Sar r8 int-shift)
(Sal r8 2)
(Add r8 1)
;; Align heap
(Or r8 7)
(Add r8 1)
(Add rbx r8)

;; Call to C function that opens file
(pad-stack c)
(Mov rdi rax)
(Call 'open_input_file)
(unpad-stack c)
;; rax now contains a FILE *

;; struct Port {
;; FILE *file;
;; int8_t buffer_len;
;; int8_t buffer_offset;
;; int8_t buffer_closed;
;; int8_t buffer[port-buffer-bytes];
;; };
(Mov r8 rbx)
(Mov (Offset rbx 0) rax) ;; Store file pointer on heap
(Xor al al)
(Mov (Offset rbx 8) al) ;; Store offset into buffer
(Mov (Offset rbx 9) al) ;; Store number of buffered bytes
(Mov (Offset rbx 10) al) ;; Store "closed" flag
;; Advance heap pointer, allocating space for a buffer
;; Choose actual space allocated based on declared
;; port-buffer-size and bytes used by rest of structure to maintain
;; heap alignment.
(Add rbx (+ (- 8 (modulo (+ 11 port-buffer-bytes) 8)) 11
port-buffer-bytes))
(Mov rax r8)
(Or rax type-port)
)]
['close-input-port
(seq
(assert-port rax c)
(pad-stack c)
(Mov rdi rax)
(Call 'close_input_port)
(unpad-stack c)
(Mov rax val-void))]
['read-byte
(seq
(assert-port rax c)
(pad-stack c)
(Mov rdi rax)
(Call 'read_byte_port)
(unpad-stack c)
)]
['peek-byte
(seq
(assert-port rax c)
(pad-stack c)
(Mov rdi rax)
(Call 'peek_byte_port)
(unpad-stack c)
)]
['vector? (type-pred ptr-mask type-vector)]
['vector-length
(seq (assert-vector rax c)
Expand Down Expand Up @@ -1088,6 +1164,8 @@
(assert-type ptr-mask type-string))
(define assert-symbol
(assert-type ptr-mask type-symbol))
(define assert-port
(assert-type ptr-mask type-port))
(define assert-vector
(assert-type ptr-mask type-vector))

Expand Down
11 changes: 8 additions & 3 deletions villain/externs.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -77,18 +77,23 @@

(define (externs-p p)
(let ((r (op->extern p)))
(if r (list (Extern r)) '())))
(match r
[#f '()]
[(? list?) (map (λ (e) (Extern e)) r)]
[_ (list (Extern r))])))

(define (op->extern o)
(match o
['peek-byte 'peek_byte]
['peek-byte '(peek_byte peek_byte_port)]
['read-byte '(read_byte read_byte_port)]
['peek-char 'peek_char]
['read-byte 'read_byte]
['read-char 'read_char]
['write-byte 'write_byte]
['write-char 'write_char]
['gensym 'gensym]
#;['string->symbol 'str_to_symbol] ;; always included now
['open-input-file 'open_input_file]
['close-input-port 'close_input_port]
[_ (char-op->uc o)]))

(define (char-op->uc o)
Expand Down
11 changes: 11 additions & 0 deletions villain/interp-prims.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@
[(list 'string->symbol (? string?)) (string->symbol v)]
[(list 'symbol->string (? symbol?)) (symbol->string v)]
[(list 'symbol? v) (symbol? v)]
[(list 'port? v) (port? v)]
[(list 'open-input-file (? string?)) (with-handlers
([exn:fail:filesystem:errno? (λ (_) 'err)])
(open-input-file v))]
[(list 'close-input-port (? port?)) (close-input-port v)]
[(list 'read-byte
(and (? port?)
(not (? port-closed?)))) (read-byte v)]
[(list 'peek-byte
(and (? port?)
(not (? port-closed?)))) (peek-byte v)]
[(list 'flonum? v) (flonum? v)]
[(list 'vector? v) (vector? v)]
[(list 'vector-length v) (vector-length v)]
Expand Down
66 changes: 66 additions & 0 deletions villain/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,72 @@
#include <wchar.h>
#include "types.h"
#include "runtime.h"
#include "utf8.h"

#define untag_port(p) ((int64_t*) (p ^ port_type_tag))
#define port_file(p) ((FILE*) p[0])
#define port_buffer_len(p) (((int8_t*)(p + 1))[0])
#define port_buffer_offset(p) (((int8_t*)(p + 1))[1])
#define port_closed(p) (((int8_t*)(p + 1))[2])
#define port_buffer(p) (((int8_t*)(p + 1)) + 3)
#define port_buffer_bytes 8

FILE *open_input_file(int64_t untagged_str, char *buffer) {
// Str is untagged in asm prior to calling here. Very confusing. Might be
// worth an extra or and xor just to avoid this.
utf8_encode_string((int64_t *) untagged_str, buffer);
FILE *f = fopen(buffer, "r");
if (f == NULL) {
error_handler();
}
return f;
}

void close_input_port(int64_t port_val) {
int64_t *port = untag_port(port_val);
if (!port_closed(port)) {
FILE *f = port_file(port);
fclose(f);
port_closed(port) = 1;
}
}

int populate_buffer(int64_t *port) {
if (port_buffer_offset(port) >= port_buffer_len(port)) {
int64_t num_read = fread(port_buffer(port), sizeof(int8_t), port_buffer_bytes, port_file(port));
port_buffer_len(port) = num_read;
port_buffer_offset(port) = 0;
return num_read > 0;
}
return 1;
}

int64_t read_byte_port(int64_t port_val) {
int64_t *port = untag_port(port_val);
if (port_closed(port)) {
error_handler();
}
int has_bytes = populate_buffer(port);
if (has_bytes) {
int8_t byte = port_buffer(port)[port_buffer_offset(port)];
port_buffer_offset(port)++;
return (byte << int_shift);
}
return val_eof;
}

int64_t peek_byte_port(int64_t port_val) {
int64_t *port = untag_port(port_val);
if (port_closed(port)) {
error_handler();
}
int has_bytes = populate_buffer(port);
if (has_bytes) {
int8_t byte = port_buffer(port)[port_buffer_offset(port)];
return (byte << int_shift);
}
return val_eof;
}

int64_t read_byte(void) {
char c = getc(in);
Expand Down
4 changes: 3 additions & 1 deletion villain/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <stdlib.h>
#include "types.h"
#include "runtime.h"
#include "char.h"
#include <math.h>

FILE* in;
Expand All @@ -17,7 +18,6 @@ int64_t *heap;

void print_result(int64_t);
void print_vector(int64_t);
void print_str(int64_t *);

void error_exit() {
printf("err\n");
Expand Down Expand Up @@ -79,6 +79,8 @@ void print_result(int64_t result) {
} else if (symbol_type_tag == (ptr_type_mask & result)) {
printf("'");
print_str((int64_t *)(result ^ symbol_type_tag));
} else if (port_type_tag == (ptr_type_mask & result)) {
printf("#<input-port>");
} else {
switch (result) {
case val_true:
Expand Down
5 changes: 4 additions & 1 deletion villain/parse.rkt
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,10 @@
string-length string? integer?
flonum?
symbol->string string->symbol symbol?
vector? vector-length))
vector? vector-length
string-length string?
close-input-port open-input-file port? read-byte peek-byte))

(define op2
'(+ - quotient remainder eq? cons string-ref make-string <=
make-vector vector-ref
Expand Down
Loading

0 comments on commit d3d5d0d

Please sign in to comment.