code: 9ferno

Download patch

ref: 7c4e9c492a69a964b6873a6f5e3a90fcc5ece23a
parent: e5bbed7675745f8887541979900361f14f727817
author: 9ferno <[email protected]>
date: Tue Nov 16 08:46:23 EST 2021

trying out a forth interpreter

--- /dev/null
+++ b/os/pc64/bindings.s
@@ -1,0 +1,88 @@
+
+/*
+plan9 assembler puts the first argument in R8 (RARG), return value in AX.
+	Caller saves registers - plan9 convention
+	not maintaining the values of the temporary registers or EFLAGS
+*/
+
+/*
+using
+	( 2nd_parameter 1st_parameter -- )	mode fd open
+		simpler code, more comprehensible with the 1st arg next to the call - my opinion
+instead of
+	( 1st_parameter 2nd_parameter -- ) 	fd mode open
+might revisit this later after a trial
+
+there is no forth word for this. It is call'able by the bindings subroutines
+*/
+TEXT	ff_to_c(SB), 1, $-4	/* ( argn .. arg2 arg1 nargs -- ) (G move args to C stack) */
+	POPQ SI			/* get the return PC from the stack */
+	MOVQ TOS, CX	/* check nargs */
+	POP(TOS)
+	TESTQ $0, CX
+	JZ .ff_to_c_done /* no args */
+	MOVQ TOS, RARG	/* 1st argument is put in RARG also */
+.ff_to_c_again
+	PUSHQ TOS
+	POP(TOS)
+	LOOP .ff_to_cagain
+.ff_to_c_done:
+	PUSH(TOS)
+	PUSH(RSP)
+	PUSH(IP)
+	PUSH(W)
+	MOVQ PSP, ffsp(SB);
+	JMP* SI /* go back to the caller */
+
+TEXT	c_to_ff_0(SB), 1, $-4	/* no returned argument */
+	MOVQ ffsp(SB), PSP
+	POP(W)
+	POP(IP)
+	POP(RSP)
+	POP(TOS)
+	RET
+TEXT	c_to_ff_1(SB), 1, $-4	/* there is a returned argument */
+	call c_to_ff_0(SB)
+	PUSH(TOS)
+	MOVQ AX, TOS	/* C puts the return value in AX */
+	RET
+
+TEXT	open(SB), 1, $-4	/* ( mode cstr -- fd ) */
+	PUSH(TOS)
+	MOVQ $2, TOS
+	CALL ff_to_c(SB)
+	CALL kopen(SB)
+	CALL c_to_ff_1(SB)
+	NEXT
+
+TEXT	close(SB), 1, $-4	/* ( fd -- n ) */
+	PUSH(TOS)
+	MOVQ $1, TOS
+	CALL ff_to_c(SB)
+	CALL kclose
+	CALL c_to_ff_1(SB)
+	NEXT
+
+TEXT	read(SB), 1, $-4	/* ( n a fd -- n2 ) */
+	PUSH(TOS)
+	MOVQ $3, TOS
+	CALL ff_to_c(SB)
+	CALL kread(SB)
+	CALL c_to_ff_1(SB)
+	NEXT
+
+TEXT	write(SB), 1, $-4	/* ( n a fd -- n2 ) */
+	PUSH(TOS)
+	MOVQ $3, TOS
+	CALL ff_to_c(SB)
+	CALL kwrite(SB)
+	CALL c_to_ff_1(SB)
+	NEXT
+
+TEXT	seek(SB), 1, $-4	/* ( type pos fd -- n ) */
+	PUSH(TOS)
+	MOVQ $3, TOS
+	CALL ff_to_c(SB)
+	CALL kseek(SB)
+	CALL c_to_ff_1(SB)
+	NEXT
--- a/os/pc64/ff.s
+++ b/os/pc64/ff.s
@@ -12,6 +12,7 @@
 
  Register usage:
 
+	Original usage
  TOS: AX top of stack register
  SP:  SP parameter stack pointer, grows towards lower memory (downwards)
  RP:  BP (= RARG) return stack pointer, grows towards higher memory (upwards)
@@ -21,6 +22,22 @@
 
 plan9 assembler puts the first argument in BP (RARG), return value in AX.
 
+	Changed to
+ Leaving AX, SP, BP (RARG) alone to not mess with the C environment
+
+ TOS: BX top of stack register
+ PSP: DX parameter stack pointer, grows towards lower memory (downwards)
+ RSP: R8 return stack pointer, grows towards higher memory (upwards)
+ IP:  R9 instruction pointer
+ W:   R10 work register (holds CFA)
+	CX, SI, DI, R11-R15 temporary registers
+
+coding standard
+: <name> (S input-stack --- output-stack) (R --- )
+	(G descriptive glossary text)
+	f1 f2  ( interim stack picture) \ programmers explanatory comment
+		.. fn ;
+
 Memory map:
 Return stack 4096 bytes at FFSTART
 	|
@@ -51,31 +68,6 @@
  */
 #include "primitives.s"
 
-/*
-plan9 assembler puts the first argument in BP (RARG), return value in AX.
-	For calling a C function with a parameter:
-		Store AX somewhere
-		POPQ AX
-		PUSHA
-		Store ff's SP
-		Restore C's SP
-		POPA	-- this should not be needed as C is caller save
-		MOVQ from somewhere to BP
-		-- call the c function
-	For calling a C function without a parameter:
-		PUSHA
-		Store ff's SP
-		Restore C's SP
-		POPA	-- this should not be needed as C is caller save
-		-- call the c function
-	For coming back from a C function: -- ignoring the return value
-		PUSHA	-- this should not be needed as C is caller save
-		Store C's SP
-		Restore ff's SP
-		POPA
-	ignoring the EFLAGS register, for now
-	not bothering with maintaining the values of the temporary registers
-*/
 #define PUSHALL \
 	PUSHQ	R13; \
 	PUSHQ	R12; \
@@ -83,22 +75,22 @@
 	PUSHQ	R10; \
 	PUSHQ	R9; \
 	PUSHQ	R8; \
-	PUSHQ	BP; \
-	PUSHQ	DI; \
-	PUSHQ	SI; \
+	PUSHQ	R8; \
+	PUSHQ	R10; \
+	PUSHQ	R9; \
 	PUSHQ	DX; \
 	PUSHQ	CX; \
 	PUSHQ	BX; \
-	PUSHQ	AX;
+	PUSHQ	TOS;
 #define POPALL \
-	POPQ	AX; \
+	POPQ	TOS; \
 	POPQ	BX; \
 	POPQ	CX; \
 	POPQ	DX; \
-	POPQ	SI; \
-	POPQ	DI; \
-	POPQ	BP; \
+	POPQ	R9; \
+	POPQ	R10; \
 	POPQ	R8; \
+	POPQ	R8; \
 	POPQ	R9; \
 	POPQ	R10; \
 	POPQ	R11; \
@@ -105,47 +97,47 @@
 	POPQ	R12; \
 	POPQ	R13;
 #define PUSHREGS \
-	PUSHQ	BP; \
-	PUSHQ	DI; \
-	PUSHQ	SI; \
-	PUSHQ	AX;
+	PUSHQ	R8; \
+	PUSHQ	R10; \
+	PUSHQ	R9; \
+	PUSHQ	TOS;
 #define POPREGS \
-	POPQ	AX; \
-	POPQ	SI; \
-	POPQ	DI; \
-	POPQ	BP; \
+	POPQ	TOS; \
+	POPQ	R9; \
+	POPQ	R10; \
+	POPQ	R8;
 
 #define FF_TO_C_0 \
 	PUSHREGS; \
-	MOVQ SP, ffsp<>(SB); \
-	MOVQ csp<>(SB), SP; \
+	MOVQ DX, ffsp<>(SB); \
+	MOVQ csp<>(SB), DX; \
 	POPREGS;
 
 #define FF_TO_C_1 \
-	MOVQ AX, BX; \
-	POPQ AX; /* drop AX from the parameter stack */ \
+	MOVQ TOS, BX; \
+	POPQ TOS; /* drop TOS from the parameter stack */ \
 	FF_TO_C_0 \
-	MOVQ BX, BP; /* 1st argument in BP == RARG */
+	MOVQ BX, R8; /* 1st argument in R8 == RARG */
 
 /* ( 1st_parameter 2nd_parameter -- ) */
 #define FF_TO_C_2 /* for calling a c function with 2 parameters */ \
-	MOVQ AX, CX; \
-	POPQ AX; \
+	MOVQ TOS, CX; \
+	POPQ TOS; \
 	FF_TO_C_1 \
-	MOVQ CX, 8(SP) \
+	MOVQ CX, 8(DX) \
 
 /* ( 1st_parameter 2nd_parameter 3rd_parameter -- ) */
 #define FF_TO_C_3 /* for calling a c function with 3 parameters */ \
-	MOVQ AX, DX; \
-	POPQ AX; \
+	MOVQ TOS, DX; \
+	POPQ TOS; \
 	FF_TO_C_2 \
-	MOVQ DX, 16(SP) \
+	MOVQ DX, 16(DX) \
 
 /* no arguments when calling ff from C, for now */
 #define C_TO_FF \
 	PUSHREGS; \
-	MOVQ SP, csp<>(SB); \
-	MOVQ ffsp<>(SB), SP; \
+	MOVQ DX, csp<>(SB); \
+	MOVQ ffsp<>(SB), DX; \
 	POPREGS;
 
 TEXT	ffmain(SB), 1, $-4		/* _main(SB), 1, $-4 without the libc */
@@ -155,76 +147,86 @@
 	 * (link + name(1+2) + code field address = 24 bytes) of the dp
 	 * dictionary entry.
 	 */
-	PUSHREGS
-	MOVQ SP, csp<>(SB); /* store C stack pointer */
-	MOVQ $FFEND, SP	/* setting up stack */
+	MOVQ $FFEND, PSP	/* setting up stack */
 	/*
 	 * dtop address is stored in the parameter field address(24-32 bytes) of mventry_dp
 	 */
-	MOVQ mventry_dp+24(SB), BX	/* now, BX = dtop address */
-	MOVQ (BX), AX	/* AX = *BX = $LAST = boot word address (defined last, stored at dtop) */
+	MOVQ mventry_dp+24(SB), SI	/* now, SI = dtop address */
+	MOVQ (SI), TOS	/* TOS = *CX = $LAST = boot word address (defined last, stored at dtop) */
 				/* if 6a allows multiple symbols per address, then 
 					the above 3 instructions would have been
-					MOVQ (($mventry_dp+24(SB))), AX */
+					MOVQ (($mventry_dp+24(SB))), TOS */
 	/*
 	 * Could do this instead of the calculations below
-	 * LEAQ 24(AX), SI
+	 * LEAQ 24(TOS), IP
 	 */
-	ADDQ $16, AX	/* AX += link (8 bytes) + len (1 byte) + minimum for align to 8 bytes */
+	ADDQ $16, TOS	/* TOS += link (8 bytes) + len (1 byte) + minimum for align to 8 bytes */
 	XORQ CX, CX
-	MOVB 8(BX), CL	/* CL = length of boot name */
-	ADDQ CX, AX		/* AX += len */
-	ANDQ $~7, AX	/* AX = address of boot's code - 8 bytes */
-	LEAQ 8(AX), SI	/* SI = L257 = start of boot code = has docol address there
-					 * skipping over docol as we do not need to save the SI
-					 * could have done LEAQ 24(AX), SI
+	MOVB 8(SI), CL	/* CL = length of boot name */
+	ADDQ CX, TOS		/* TOS += len */
+	ANDQ $~7, TOS	/* TOS = address of boot's code - 8 bytes */
+	LEAQ 8(TOS), IP	/* IP = L257 = start of boot code = has docol address there
+					 * skipping over docol as we do not need to save the IP
 					 */
 
 /* lodsl could make this simpler. But, this is more comprehensible
-	why not JMP* (DI)?
+	why not JMP* (W)?
  */
-#define NEXT	MOVQ (SI), DI; \
-		ADDQ $8, SI; \
-		MOVQ (DI), BX; \
-		JMP* BX;
+#define NEXT	MOVQ (IP), W; /* W = Address next to the DOCOL of boot */ \
+		ADDQ $8, IP; /* move IP further = DOCOL address + 16 */ \
+		MOVQ (W), TOS; /* TOS = code field address of the 1st instruction after DOCOL of boot */ \
+		JMP* TOS; /* Start executing that code field address */
+/*
+Address   0     8    16
+aword : docol  40   ...
+Address   40    48
+bword : docol   72
+Address   72    80
+cword : docol  ....
+at docol address, some assembly instruction
+Assume IP = 8
+ */
+#define NEXT	MOVQ (IP), W;	/* W = 40, contents of address in IP, some word's code field address */ \
+		MOVQ (W), TOS;	/* TOS = docol, Get the address in the address in IP = code field address */ \
+		ADDQ $8, IP; 	/* move IP further, IP = 16 */ \
+		JMP* TOS; /* Start executing at docol address, JMP* = jump to a non-relative address */
 
-	NEXT
+#define PUSH(r)	SUBQ $8, PSP; \
+			MOVQ r, (PSP)
+#define POP(r)	MOVQ (PSP), r; \
+			ADDQ $8, PSP
 
-TEXT	ffprint(SB), 1, $-4
-	FF_TO_C_2
-	CALL screenput(SB)
-	C_TO_FF
 	NEXT
 
 TEXT	reset(SB), 1, $-4
-	MOVQ $FFSTART, BP
+	MOVQ $FFSTART, RSP
 	NEXT
 
 TEXT	clear(SB), 1, $-4
-	MOVQ $FFEND, SP
+	MOVQ $FFEND, PSP
 	NEXT
 
 TEXT	colon(SB), 1, $-4
-	MOVQ SI,(BP)
-	ADDQ $8, BP
-	LEAQ 8(DI), SI
+	MOVQ IP,(RSP)
+	ADDQ $8, RSP
+	LEAQ 8(W), IP
 	NEXT
 
 TEXT	exitcolon(SB), 1, $-4
-	SUBQ $8, BP
-	MOVQ (BP), SI
+	SUBQ $8, RSP
+	MOVQ (RSP), IP
 	NEXT
 
 TEXT	dodoes(SB), 1, $-4	/* ( -- a ) */
-	MOVQ SI,(BP)
-	ADDQ $8,BP
-	MOVQ 8(DI),SI
-	PUSHQ AX
-	LEAQ 16(DI), AX
+	MOVQ IP,(RSP)
+	ADDQ $8,RSP
+	MOVQ 8(W),IP
+	PUSH(TOS)
+	LEAQ 16(W), TOS
 	NEXT
 
 TEXT	jump(SB), 1, $-4	/* ( -- ) */
-	MOVQ (SI),SI
+	MOVQ (IP),IP
 	NEXT
 
 /* ( f -- ) cjump address
@@ -231,50 +233,53 @@
 	if true, skip the address and continue
 	else, go to the address */
 TEXT	cjump(SB), 1, $-4	/* ( f -- ) */
-	MOVQ (SI), BX	/* get the next address */
-	ADDQ $8, SI	/* move esi beyond that */
-	TESTQ AX, AX
+	MOVQ (IP), TOS	/* get the next address */
+	ADDQ $8, IP	/* move esi beyond that */
+	TESTQ TOS, TOS
 	JNZ .l1		/* if true, move along */
-	MOVQ BX, SI	/* if false, go to the above address */
+	MOVQ TOS, IP	/* if false, go to the above address */
 .l1:
-	POPQ AX
+	POP(TOS
 	NEXT
 
+/* TODO change to allow only fetches from a certain memory range */
 TEXT	fetch(SB), 1, $-4	/* ( a -- n) */
-	MOVQ (AX), AX
+	MOVQ (TOS), TOS
 	NEXT
 
+/* TODO change to allow stores to a certain memory range only */
 TEXT	store(SB), 1, $-4	/* ( n a -- ) */
-	POPQ (AX)
-	POPQ AX
+	POP(CX)
+	MOVQ CX, (TOS)
+	POP(TOS)
 	NEXT
 
 TEXT	cfetch(SB), 1, $-4	/* ( a -- c ) */
-	XORQ BX, BX
-	MOVB (AX), BL
-	MOVQ BX, AX
+	XORQ CX, CX
+	MOVB (TOS), CL
+	POP(TOS)
 	NEXT
 
 TEXT	cstore(SB), 1, $-4	/* ( c a -- ) */
-	POPQ BX
-	MOVB BL, (AX)
-	POPQ AX
+	POP(CX)
+	MOVB CL, (TOS)
+	POP(TOS)
 	NEXT
 
 TEXT	terminate(SB), 1, $-4	/* ( n -- ) */
-	XORQ BX, BX
-	TESTQ AX, AX
+	XORQ CX, CX
+	TESTQ TOS, TOS
 	JZ .l2
-	MOVQ $failtext(SB), BX
+	MOVQ $failtext(SB), TOS
 .l2:
-	/* PUSHQ BX */
-	/* SUBQ $8, SP */	/* dummy retaddr */
-	MOVQ BX, a0+0(FP)	/* address of exit status? status = nil? */
+	/* PUSHQ CX */
+	/* SUBQ $8, PSP */	/* dummy retaddr */
+	MOVQ CX, a0+0(FP)	/* address of exit status? status = nil? */
 	MOVQ $8, RARG	/* EXITS */
-	SYSCALL		/* syscall for exit */
+	SYSCALL		/* TODO syscall for exit */
 
 TEXT	testfsopen(SB), 1, $-4
-	PUSHQ SI	/* for some reason, the syscall is changing SI and DI */
+	PUSHQ SI	/* for some reason, the syscall is changing IP and W */
 	PUSHQ BP
 	PUSHQ $0	/* OREAD */
 	PUSHQ $name(SB)
@@ -281,9 +286,9 @@
 	PUSHQ $0	/* dummy retaddr */
 	MOVQ $14, RARG	/* open */
 	SYSCALL
-	ADDQ $24, SP
-	POPQ BP
-	POPQ SI
+	ADDQ $24, PSP
+	POPQ RSP
+	POPQ IP
 	NEXT
 	NOP
 	NOP
@@ -291,187 +296,45 @@
 	NOP
 	NOP
 
-/* man errstr */
-TEXT	errstr(SB), 1, $-4
-	PUSHQ SI	/* for some reason, the syscall is changing SI and DI */
-	PUSHQ BP
-	PUSHQ AX
+#include "bindings.s"
 
-	PUSHQ $128	/* size */
-	PUSHQ $errstrbuffer(SB) /* buf */
-	PUSHQ $0	/* dummy retaddr */
-	MOVQ $41, RARG	/* errstr */
-	SYSCALL
-	ADDQ $24, SP
-
-	MOVQ $-1, BX	/* -1LL (seek pos) */
-	PUSHQ BX	/* offset */
-	PUSHQ $128	/* size, could use c's strlen for the exact size */
-	PUSHQ $errstrbuffer(SB) /* buf */
-	PUSHQ $2	/* assuming that stderr = 2 */
-	PUSHQ $0	/* dummy retaddr */
-	MOVQ $51, RARG	/* PWRITE */
-	SYSCALL
-	ADDQ $40, SP
-
-	POPQ AX
-	POPQ BP
-	POPQ SI
-	NEXT
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-
-TEXT	fsopen(SB), 1, $-4	/* ( cstr flags mode -- fd ) */
-	POPQ BX		/* flags */
-	POPQ CX		/* name */
-	PUSHQ SI	/* for some reason, the syscall is changing SI and DI */
-	PUSHQ BP
-	PUSHQ AX
-	MOVQ $14, RARG	/* OPEN */
-	TESTQ $512, BX	/* O_CREAT? */
-	JZ .l3
-	MOVQ $22, RARG	/* CREATE */
-.l3:
-	ANDQ $0xF, BX	/* retain only OREAD, OWRITE, ORDWR */
-	PUSHQ BX
-	PUSHQ CX
-	PUSHQ $0	/* dummy retaddr */
-	SYSCALL
-	ADDQ $32, SP
-	POPQ BP
-	POPQ SI
-	NEXT
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-
-TEXT	fsclose(SB), 1, $-4	/* ( fd -- n ) */
-	PUSHQ SI	/* for some reason, the syscall is changing SI and DI */
-	PUSHQ BP
-	PUSHQ AX	/* fd */
-	PUSHQ $0	/* dummy retaddr */
-	MOVQ $4, RARG	/* CLOSE */
-	SYSCALL
-	ADDQ $16, SP	/* removing the pushed parameters */
-	POPQ BP
-	POPQ SI
-	NEXT
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-
-TEXT	fsread(SB), 1, $-4	/* ( a n fd -- n2 ) */
-	POPQ CX		/* size */
-	POPQ DX		/* buf */
-	PUSHQ SI	/* for some reason, the syscall is changing SI and DI */
-	PUSHQ BP
-	MOVQ $-1, BX	/* -1LL (seek pos) */
-	PUSHQ BX	/* offset */
-	PUSHQ CX	/* size */
-	PUSHQ DX	/* buf */
-	PUSHQ AX	/* fd */
-	PUSHQ $0	/* dummy retaddr */
-	MOVQ $50, RARG	/* PREAD */
-	SYSCALL		/* return value in AX */
-	ADDQ $40, SP
-	POPQ BP		/* restore return stack pointer */
-	POPQ SI
-	NEXT
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-
-TEXT	fswrite(SB), 1, $-4	/* ( a n fd -- n2 ) */
-	POPQ CX		/* size */
-	POPQ DX		/* buf */
-	PUSHQ SI	/* for some reason, the syscall is changing SI and DI */
-	PUSHQ BP
-	MOVQ $-1, BX	/* -1LL (seek pos) */
-	PUSHQ BX	/* offset */
-	PUSHQ CX	/* size */
-	PUSHQ DX	/* buf */
-	PUSHQ AX	/* fd */
-	PUSHQ $0	/* dummy retaddr */
-	MOVQ $51, RARG	/* PWRITE */
-	SYSCALL
-	ADDQ $40, SP
-	POPQ BP
-	POPQ SI
-	NEXT
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-	NOP
-
-TEXT	fsseek(SB), 1, $-4	/* ( pos fd -- n ) */
-	POPQ BX		/* offset */
-	PUSHQ SI	/* for some reason, the syscall is changing SI and DI */
-	PUSHQ BP
-	XORQ DX, DX	/* type 0 */
-	PUSHQ DX	/* type 0 */
-	PUSHQ BX	/* offset */
-	PUSHQ AX  	/* fd */
-	PUSHQ $0	/* dummy retaddr */
-	MOVQ $39, RARG	/* SEEK */
-	SYSCALL
-	ADDQ $32, SP	/* remove the pushed parameters */
-	POPQ BP
-	POPQ SI
-	NEXT
-
 TEXT	mmap(SB), 1, $-4	/* ( a1 -- a2 ) */
-	MOVQ $-1, AX	/* unimplemented */
+	MOVQ $-1, TOS	/* unimplemented */
 
 TEXT	variable(SB), 1, $-4	/* ( -- a ) */
-	PUSHQ AX
-	LEAQ 8(DI), AX
+	PUSH(TOS)
+	LEAQ 8(W), TOS
 	NEXT
 
 TEXT	constant(SB), 1, $-4	/* ( -- n ) */
-	PUSHQ AX
-	MOVQ 8(DI), AX
+	PUSH(TOS)
+	MOVQ 8(W), TOS
 	NEXT
 
 TEXT	literal(SB), 1, $-4	/* ( -- n ) */
-	PUSHQ AX
-	MOVQ (SI), AX
-	ADDQ $8, SI
+	PUSH(TOS)
+	MOVQ (IP), TOS
+	ADDQ $8, IP
 	NEXT
 
 TEXT	sliteral(SB), 1, $-4	/* ( -- a n ) */
-	PUSHQ AX
-	XORQ AX,AX
-	MOVB (SI), AL
-	INCQ SI
-	PUSHQ SI
-	ADDQ AX, SI
-	ADDQ $7, SI
-	ANDQ $~7, SI
+	PUSH(TOS)
+	XORQ TOS,TOS
+	MOVB (IP), BL
+	INCQ IP
+	PUSH(IP)
+	ADDQ TOS, IP
+	ADDQ $7, IP
+	ANDQ $~7, IP
 	NEXT
 
 /* puts the top 2 entries of the data stack in the return stack */
 TEXT	doinit(SB), 1, $-4	/* ( hi lo -- ) */
-	MOVQ AX, (BP)
-	POPQ AX
-	MOVQ AX, 8(BP)
-	POPQ AX
-	ADDQ $16, BP
+	MOVQ TOS, (RSP)
+	POP(TOS)
+	MOVQ TOS, 8(RSP)
+	POP(TOS)
+	ADDQ $16, RSP
 	NEXT
 
 /* not sure if this works, needs testing to follow https://github.com/mark4th/x64
@@ -484,130 +347,150 @@
 	when hi > lo, go to the address next to doloop
  */
 TEXT	doloop(SB), 1, $-4
-	INCQ -16(BP)
+	INCQ -16(RSP)
 doloop1:
-	MOVQ -16(BP), BX
-	CMPQ BX, -8(BP)
+	MOVQ -16(RSP), CX
+	CMPQ CX, -8(RSP)
 	JGE .l4
-	MOVQ (SI), SI
+	MOVQ (IP), IP
 	NEXT
 .l4:
-	SUBQ $16, BP
-	ADDQ $8, SI
+	SUBQ $16, RSP
+	ADDQ $8, IP
 	NEXT
 
 TEXT	doploop(SB), 1, $-4	/* ( n -- ) */
-	ADDQ AX, -16(BP)
-	POPQ AX
+	ADDQ TOS, -16(RSP)
+	POP(TOS)
 	JMP doloop1
 
 TEXT	rfetch(SB), 1, $-4	/* ( -- n ) */
-	PUSHQ AX
-	MOVQ -8(BP), AX
+	PUSH(TOS)
+	MOVQ -8(RSP), TOS
 	NEXT
 
 TEXT	rpush(SB), 1, $-4	/* ( n -- ) */
-	MOVQ AX,(BP)
-	POPQ AX
-	ADDQ $8,BP
+	MOVQ TOS,(RSP)
+	POP(TOS)
+	ADDQ $8,RSP
 	NEXT
 
 TEXT	rpop(SB), 1, $-4	/* ( -- n ) */
-	PUSHQ AX
-	SUBQ $8, BP
-	MOVQ (BP), AX
+	PUSH(TOS)
+	SUBQ $8, RSP
+	MOVQ (RSP), TOS
 	NEXT
 
 TEXT	i(SB), 1, $-4	/* ( -- n ) */
-	PUSHQ AX
-	MOVQ -16(BP), AX
+	PUSH(TOS)
+	MOVQ -16(RSP), TOS
 	NEXT
 
 TEXT	j(SB), 1, $-4	/* ( -- n ) */
-	PUSHQ AX
-	MOVQ -32(BP), AX
+	PUSH(TOS)
+	MOVQ -32(RSP), TOS
 	NEXT
 
 TEXT	plus(SB), 1, $-4	/* ( n1 n2 -- n ) */
-	POPQ BX
-	ADDQ BX, AX
+	POP(CX)
+	ADDQ CX, TOS
 	NEXT  
 
 TEXT	minus(SB), 1, $-4	/* ( n1 n2 -- n ) */
-	POPQ BX
-	SUBQ AX, BX
-	MOVQ BX, AX
+	MOVQ TOS, CX
+	POP(TOS)
+	SUBQ CX, TOS
 	NEXT
 
-TEXT	multiply(SB), 1, $-4	/* ( n1 n2 -- n ) */
-	POPQ BX
-	IMULQ BX
+TEXT	multiply(SB), 1, $-4	/* ( n1 n2 -- n1*n2 ) */
+	POP(CX)
+	IMULQ CX,TOS
 	NEXT
 
-TEXT	slashmod(SB), 1, $-4	/* ( n1 n2 -- n3 n4 ) */
-	MOVQ AX, BX
-	MOVQ (SP), AX
-	CDQ
-	IDIVQ BX
-	MOVQ AX, (SP)
+TEXT	slashmod(SB), 1, $-4	/* ( n1 n2 -- remainder quotient ) n1/n2 */
+	MOVQ (PSP), CX /* CX = n1 */
+	PUSHQ DX /* DX == PSP, store DX and AX as they are used by CDQ and IDIV */
+	PUSHQ AX
+	XORQ DX, DX /* DX = 0 */
+	MOVQ CX, AX /* AX = n1 */
+	CDQ 		/* RAX -> RDX:RAX sign extension */
+	IDIVQ TOS	/* RDX:RAX / TOS => Quotient in RAX, Remainder in RDX */
+	MOVQ DX, CX	/* CX = remainder */
+	MOVQ AX, TOS /* TOS = quotient */
+	POPQ AX
+	POPQ DX
+	MOVQ CX, (PSP) /* -- remainder quotient */
 	NEXT
 
-TEXT	uslashmod(SB), 1, $-4	/* ( u1 u2 -- u3 u4 ) */
-	MOVQ AX, BX
-	MOVQ (SP), AX
-	XORQ DX, DX
-	DIVQ BX
-	MOVQ DX, (SP)
+TEXT	uslashmod(SB), 1, $-4	/* ( u1 u2 -- uremainder uquotient ) */
+	MOVQ (PSP), CX /* CX = n1 */
+	PUSHQ DX /* DX == PSP, store DX and AX as they are used by CDQ and IDIV */
+	PUSHQ AX
+	XORQ DX, DX /* DX = 0 */
+	MOVQ CX, AX /* AX = n1 */
+	IDIVQ TOS	/* RDX:RAX / TOS => Quotient in RAX, Remainder in RDX */
+	MOVQ DX, CX	/* CX = remainder */
+	MOVQ AX, TOS /* TOS = quotient */
+	POPQ AX
+	POPQ DX
+	MOVQ CX, (PSP) /* -- uremainder uquotient */
 	NEXT
 
+	MOVQ TOS, TOS
+	MOVQ (PSP), TOS
+	XORQ PSP, PSP
+	DIVQ TOS
+	MOVQ PSP, (PSP)
+	NEXT
+
 TEXT	binand(SB), 1, $-4	/* ( n1 n2 -- n ) */
-	ANDQ (SP), AX
-	ADDQ $8, SP
+	ANDQ (PSP), TOS
+	ADDQ $8, PSP
 	NEXT
 
 TEXT	binor(SB), 1, $-4	/* ( n1 n2 -- n ) */
-	ORQ (SP), AX
-	ADDQ $8, SP
+	ORQ (PSP), TOS
+	ADDQ $8, PSP
 	NEXT
 
 TEXT	binxor(SB), 1, $-4	/* ( n1 n2 -- n ) */
-	XORQ (SP), AX
-	ADDQ $8, SP
+	XORQ (PSP), TOS
+	ADDQ $8, PSP
 	NEXT
 
 TEXT	xswap(SB), 1, $-4	/* ( x y -- y x ) */
-	XCHGQ AX, (SP)
+	XCHGQ TOS, (PSP)
 	NEXT
 
 TEXT	drop(SB), 1, $-4	/* ( x -- ) */
-	POPQ AX
+	POP(TOS)
 	NEXT
 
 TEXT	dup(SB), 1, $-4	/* ( x -- x x ) */
-	PUSHQ AX
+	PUSH(TOS)
 	NEXT
 
 TEXT	over(SB), 1, $-4	/* ( x y -- x y x ) */
-	PUSHQ AX
-	MOVQ 8(SP), AX
+	PUSH(TOS)
+	MOVQ 8(PSP), TOS
 	NEXT
 
 TEXT	equal(SB), 1, $-4	/* ( x y -- f ) */
-	POPQ BX
-	CMPQ BX, AX
+	POP(CX)
+	CMPQ CX, TOS
 	JEQ .true
-	XORQ AX, AX
+	XORQ TOS, TOS
 	NEXT
 TEXT	true(SB), 1, $-4
 .true:
-	MOVQ $-1, AX
+	MOVQ $-1, TOS
 	NEXT
 	
 TEXT	greater(SB), 1, $-4	/* ( x y -- f ) */
-	POPQ BX
-	CMPQ BX, AX
+	POP(CX)
+	CMPQ CX, TOS
 	JGT .true
-	XORQ AX, AX
+	XORQ TOS, TOS
 	NEXT
 
 /* if x < y then y - x > 0, no sign flag
@@ -616,82 +499,82 @@
 	compare x and y == CMP x, y
  */
 TEXT	less(SB), 1, $-4	/* ( x y -- f ) */
-	POPQ BX
-	CMPQ BX, AX
+	POP(CX)
+	CMPQ CX, TOS
 	JLT .true
-	XORQ AX, AX
+	XORQ TOS, TOS
 	NEXT
 
 TEXT	stackptr(SB), 1, $-4	/* ( -- a ) does not include TOS! */
-	PUSHQ AX
-	MOVQ SP, AX
+	PUSH(TOS)
+	MOVQ PSP, TOS
 	NEXT
 
 TEXT	lshift(SB), 1, $-4	/* ( n1 n2 -- n ) */
-	MOVQ AX, CX
-	POPQ AX
-	SHLQ CL, AX
+	MOVQ TOS, CX
+	POP(TOS)
+	SHLQ CL, TOS
 	NEXT
 	
 TEXT	rshift(SB), 1, $-4	/* ( n1 n2 -- n ) */
-	MOVQ AX, CX
-	POPQ AX
-	SHRQ CL, AX
+	MOVQ TOS, CX
+	POP(TOS)
+	SHRQ CL, TOS
 	NEXT
 
 TEXT	rshifta(SB), 1, $-4	/* ( n1 n2 -- n ) */
-	MOVQ AX, CX
-	POPQ AX
-	SARQ CL, AX
+	MOVQ TOS, CX
+	POP(TOS)
+	SARQ CL, TOS
 	NEXT
 
 TEXT	execute(SB), 1, $-4	/* ( ... a -- ... ) */
-	MOVQ AX, DI
-	POPQ AX
-	MOVQ (DI), BX
-	JMP BX
+	MOVQ TOS, W
+	POP(TOS)
+	MOVQ (W), CX
+	JMP CX
 
 TEXT	deferred(SB), 1, $-4
-	MOVQ 8(DI), DI
-	MOVQ (DI), BX
-	JMP BX
+	MOVQ 8(W), W
+	MOVQ (W), CX
+	JMP CX
 
 TEXT	unloop(SB), 1, $-4
-	SUBQ $16, BP
+	SUBQ $16, RSP
 	NEXT
 
 TEXT	cmove(SB), 1, $-4	/* ( a1 a2 n -- ) */
-	MOVQ AX, CX
-	POPQ DI
-	MOVQ SI, BX
-	POPQ SI
+	MOVQ TOS, CX
+	POP(W)
+	MOVQ IP, CX
+	POP(IP)
 	REP; MOVSB
-	MOVQ BX, SI
-	POPQ AX
+	MOVQ CX, IP
+	POP(TOS)
 	NEXT
 
 TEXT	cmoveb(SB), 1, $-4	/* ( a1 a2 n -- ) */
-	MOVQ AX, CX
-	POPQ DI
-	DECQ AX
-	ADDQ AX, DI
-	MOVQ SI, BX
-	POPQ SI
-	ADDQ AX, SI
+	MOVQ TOS, CX
+	POP(W)
+	DECQ TOS
+	ADDQ TOS, W
+	MOVQ IP, CX
+	POP(IP)
+	ADDQ TOS, IP
 	STD
 	REP; MOVSB
 	CLD
-	MOVQ BX, SI
-	POPQ AX
+	MOVQ CX, IP
+	POP(TOS)
 	NEXT
 
 TEXT	cas(SB), 1, $-4	/* ( a old new -- f ) */
-	MOVQ AX, CX	/* new */
-	POPQ AX	/* old */
-	POPQ BX	/* addr */
-	LOCK; CMPXCHGQ CX, (BX)
+	MOVQ TOS, DI	/* new */
+	POP(TOS)	/* old */
+	POP(SI)	/* addr */
+	LOCK; CMPXCHGQ DI, (SI)
 	JE .true
-	XORQ AX, AX
+	XORQ TOS, TOS
 	/* pause -- no equivalent in 6a ? */
 	NEXT
 
@@ -707,7 +590,7 @@
 GLOBL	name(SB), $14
 
 DATA	errstrbuffer(SB)/1, $0
-GLOBL	errstrbuffer(SB), $128	/* matches ERRMAX of libc.h */
+GLOBL	errstrbuffer(SB), $128	/* matches ERRMTOS of libc.h */
 
 DATA	tibuffer(SB)/1, $0
 GLOBL	tibuffer(SB), $1024
@@ -716,7 +599,7 @@
 /* TODO there should not be a heap limit, get rid of this */
 /*
 DATA	heap(SB)/1, $0
-GLOBL	heap(SB), $HEAP_SIZE
+GLOBL	heap(SB), $HEIP_IPZE
 */