ref: 9e77fa393815e9c1413533949ff6dd53b5a3d91f
dir: /os/pc64/forth.s/
#include "mem.h" /* Goal is to replace the dis vm with forth forth kernel, amd64 9front variant Register usage: Original usage TOS: AX top of stack register SP: SP parameter stack pointer, grows towards lower memory (downwards) RP: BP (= RARG) return stack pointer, grows towards higher memory (upwards) AP: SI address pointer W: DI work register (holds CFA) BX, CX, DX, R8-R15 temporary registers plan9 amd64 assembler puts the first argument in BP (RARG), return value in AX. Changed to Leaving AX, SP, BP (RARG), R14 (up Proc), R15 (m Mach) alone to not mess with the C environment TOP: BX top of stack register PSP: DX parameter stack pointer, grows towards lower memory (downwards) RSP: R8 return stack pointer, grows towards higher memory (upwards) IP: R9 instruction pointer W: R10 work register (holds CFA) UP: R11 register holding the start of this process's heap memory UPE: R12 register holding the end of this process's heap memory -- TODO, use this CX, SI, DI, R13 temporary registers coding standard : <name> (S input-stack --- output-stack) (R --- ) (G descriptive glossary text) f1 f2 ( interim stack picture) \ programmers explanatory comment .. fn ; Heap memory map: uses n+ pages at the start not bothering with increase by *2 when filled up, for now check mem.h for the memory map Keeps static code/data at the start and variable data to the end For performance reasons, if you are writing to a cache line with code in it, I think there is a performance penalty. Because x86 has a separate instruction and data cache in L1, but is kind enough to abstract this away. So you can write over executing code, but it causes some synchronisation that might be expensive. And I would expect this to be done on a cache-line granularity. So you could end up with code running slower just because it happens to be defined after space for some data that's in active use... not good - veltas on #forth high memory word buffer 512 bytes error string buffer 128 bytes latest dictionary entry, Dtop need this as the system definitions and user definitions are not continuous dictionary pointer, Dp, Dtop, Args forth stack pointer, forthpsp forth variables Return stack 1 page (4096 bytes, BY2PG, 512 entries) at FFSTART | | v (grows downwards) ^ (grows upwards) | | tib, text input buffer 1024 bytes (until the next page?) Parameter stack 1 page (BY2PG, 512 entries) at FFEND-4096 | | v (grows downwards) Pad is 256 bytes from here ^ (grows upwards) | | User dictionary upto n pages from the start UPE: heap end UP: heap start forth constants low memory TODO Move variable space out of the dictionary from #forth 11:31 < joe9> In x86 you want to keep the code in a different section than variables -- why? 11:31 < joe9> in my port, I am keeping them together. 11:32 < joe9> it gets messy with different sections. */ #define TOP BX /* top of stack register */ #define PSP DX /* parameter stack pointer, grows towards lower memory (downwards) */ #define RSP R8 /* return stack pointer, grows towards higher memory (upwards) */ #define IP R9 /* instruction pointer */ #define W R10/* work register (holds CFA) */ #define UP R11/* start of heap memory */ #define UPE R12/* end of heap memory */ #define PSTACK_SIZE BY2PG #define RSTACK_SIZE BY2PG /* putting this above the asm code as the v_dp define is needed by _main */ /* m_ for primitive/macro word cfa mc_ for primtive/macro word constants c_ for colon word cfa ci_ for colon immediate word cfa v_ for colon variable word cfa CONSTANTS - capital letters Variables - initial capital case words - lower case */ /* HEAPSTART, HEAPEND, HERE, DTOP, VHERE are loaded by the caller */ TEXT forthmain(SB), 1, $-4 /* no stack storage required */ /* Argument has the start of heap */ MOVQ RARG, UP /* start of heap memory */ MOVQ 8(UP), UPE /* HEAPEND populated by the caller */ MOVQ UP, RSP ADDQ $RSTACK, RSP /* return stack pointer, reset */ MOVQ UP, PSP ADDQ $PSTACK, PSP /* parameter stack pointer - stack setup, clear */ /* execute boot */ MOVQ UP, CX ADDQ $DTOP, CX /* address of last defined word (c_boot) is at DTOP */ MOVQ (CX), IP /* IP = address of c_boot */ ADDQ $24, IP /* to get to the parameter field address of boot word */ /* lodsl could make this simpler. But, this is more comprehensible why not JMP* (W)? Address 0 8 16 aword : docol 40 ... Address 40 48 bword : docol 72 Address 72 80 cword : docol .... at docol address, some assembly instruction Assume IP = 8 */ #define NEXT MOVQ (IP), W; /* W = 40, contents of address in IP, some word's code field address */ \ MOVQ (W), CX; /* TOP = docol, Get the address in the address in IP = code field address */ \ ADDQ $8, IP; /* move IP further, IP = 16 */ \ JMP* CX; /* Start executing at docol address, JMP* = jump to a non-relative address */ #define PUSH(r) SUBQ $8, PSP; \ MOVQ r, (PSP); #define POP(r) MOVQ (PSP), r; \ ADDQ $8, PSP; #define RPUSH(r) SUBQ $8, RSP; \ MOVQ r, (RSP); #define RPOP(r) MOVQ (RSP), r; \ ADDQ $8, RSP; NEXT TEXT reset(SB), 1, $-4 MOVQ UP, RSP ADDQ $RSTACK, RSP NEXT TEXT clear(SB), 1, $-4 MOVQ UP, PSP ADDQ $PSTACK, PSP NEXT TEXT colon(SB), 1, $-4 RPUSH(IP) LEAQ 8(W), IP NEXT TEXT exitcolon(SB), 1, $-4 RPOP(IP) NEXT TEXT dodoes(SB), 1, $-4 /* ( -- a ) */ RPUSH(IP) MOVQ 8(W),IP PUSH(TOP) LEAQ 16(W), TOP NEXT TEXT jump(SB), 1, $-4 /* ( -- ) */ MOVQ (IP),IP NEXT TEXT execute(SB), 1, $-4 /* ( ... a -- ... ) */ PUSH(TOP) CALL validateaddress(SB) /* a a -- a */ MOVQ TOP, W POP(TOP) MOVQ (W), CX JMP* CX TEXT deferred(SB), 1, $-4 MOVQ 8(W), W MOVQ (W), CX JMP* CX /* ( f -- ) cjump address if true, skip the address and continue else, go to the address */ TEXT cjump(SB), 1, $-4 /* ( f -- ) */ MOVQ (IP), CX /* get the next address */ ADDQ $8, IP /* move esi beyond that */ TESTQ TOP, TOP JNZ .l1 /* if true, move along */ MOVQ CX, IP /* if false, go to the above address */ .l1: POP(TOP) NEXT TEXT fetch(SB), 1, $-4 /* ( a -- n) */ PUSH(TOP) CALL validateaddress(SB) /* a a -- a */ MOVQ (TOP), TOP NEXT TEXT store(SB), 1, $-4 /* ( n a -- ) */ PUSH(TOP) CALL validateaddress(SB) /* a a -- a */ POP(CX) MOVQ CX, (TOP) POP(TOP) NEXT TEXT cfetch(SB), 1, $-4 /* ( a -- c ) */ PUSH(TOP) CALL validateaddress(SB) /* a a -- a */ XORQ CX, CX MOVB (TOP), CL MOVQ CX, TOP NEXT TEXT cstore(SB), 1, $-4 /* ( c a -- ) */ PUSH(TOP) CALL validateaddress(SB) /* c a a -- c a */ POP(CX) MOVB CL, (TOP) POP(TOP) NEXT TEXT terminate(SB), 1, $-4 /* ( n -- ) */ MOVQ TOP, AX RET TEXT fthdump(SB), 1, $-4 /* ( n -- ) */ INT $0 CALL dumpstack(SB) RET #include "bindings.s" TEXT mmap(SB), 1, $-4 /* ( a1 -- a2 ) */ MOVQ $-1, TOP /* unimplemented */ /* * Traditionally, the pfa of variable has the value. * But, to keep the populated dictionary read-only, we * moved the variables to the variables area. * Now, pfa holds the address where the value is stored * instead of the actual value. * With this change, there is no difference in how the * costant cfa and variable cfa work. They both load the * value in the pfa to the top of the stack. The only * difference is that the value in the pfa is an address * for the variable and the actual value for the constant. */ TEXT variable(SB), 1, $-4 /* ( -- a ) */ PUSH(TOP) MOVQ 8(W), TOP /* LEAQ 8(W), TOP */ NEXT TEXT constant(SB), 1, $-4 /* ( -- n ) */ PUSH(TOP) MOVQ 8(W), TOP NEXT TEXT literal(SB), 1, $-4 /* ( -- n ) */ PUSH(TOP) MOVQ (IP), TOP ADDQ $8, IP NEXT /* ( -- a n) and set IP = aligned(IP+count) */ TEXT sliteral(SB), 1, $-4 /* ( -- a n ) */ PUSH(TOP) XORQ TOP,TOP MOVB (IP), BL /* move the count from (IP) to BL (lower byte of TOP) */ INCQ IP PUSH(IP) /* push IP onto the stack */ ADDQ TOP, IP ADDQ $7, IP ANDQ $~7, IP /* set IP += count, then align the IP to the next cell */ NEXT /* puts the top 2 entries of the data stack in the return stack ( limit index -- ) (R -- index limit ) */ TEXT doinit(SB), 1, $-4 /* ( hi lo -- ) (R -- lo hi */ RPUSH(TOP) POP(TOP) RPUSH(TOP) POP(TOP) NEXT TEXT unloop(SB), 1, $-4 ADDQ $16, RSP NEXT /* needs testing to follow https://github.com/mark4th/x64 check the notes return stack would have current index end index (R index limit -- ) increment index when limit > index, go to the address next to doloop */ TEXT doloop(SB), 1, $-4 INCQ 8(RSP) doloop1: MOVQ 8(RSP), CX CMPQ CX, 0(RSP) JGE .l4 MOVQ (IP), IP NEXT .l4: ADDQ $16, RSP ADDQ $8, IP NEXT TEXT doploop(SB), 1, $-4 /* ( n -- ) */ ADDQ TOP, 16(RSP) POP(TOP) JMP doloop1 TEXT rfetch(SB), 1, $-4 /* ( -- n ) no change in RSP */ PUSH(TOP) MOVQ (RSP), TOP NEXT TEXT rpop(SB), 1, $-4 /* ( -- n ) (R n -- )*/ PUSH(TOP) RPOP(TOP) NEXT TEXT rpush(SB), 1, $-4 /* ( n -- ) (R -- n ) */ RPUSH(TOP) POP(TOP) NEXT TEXT i(SB), 1, $-4 /* ( -- index ) (R index limit -- index limit ) */ PUSH(TOP) MOVQ 8(RSP), TOP NEXT /* in nested do loops, j is the outer loop's index */ TEXT j(SB), 1, $-4 /* ( -- index1 ) (R index1 limit1 index2 limit2 -- index1 limit1 index2 limit2 ) */ PUSH(TOP) MOVQ 24(RSP), TOP NEXT TEXT plus(SB), 1, $-4 /* ( n1 n2 -- n ) */ POP(CX) ADDQ CX, TOP NEXT TEXT minus(SB), 1, $-4 /* ( n1 n2 -- n ) */ MOVQ TOP, CX POP(TOP) SUBQ CX, TOP NEXT TEXT multiply(SB), 1, $-4 /* ( n1 n2 -- n1*n2 ) */ POP(CX) IMULQ CX,TOP NEXT /* TODO *\/ for the quotient ( *\/ nip ) *\/MOD for the remainder and quotient */ TEXT multiplyslashmod(SB), 1, $-4 /* ( n1 n2 n3 -- remainder((n1*n2)/n3) quotient((n1*n2)/n3) */ POP(CX) IMULQ CX,TOP NEXT /* / for only the quotient /mod for the remainder and quotient mod for the remainder */ TEXT slashmod(SB), 1, $-4 /* ( n1 n2 -- remainder quotient ) n1/n2 */ MOVQ (PSP), CX /* CX = n1 */ PUSHQ DX /* DX == PSP, store DX and AX as they are used by CDQ and IDIV */ PUSHQ AX XORQ DX, DX /* DX = 0 */ MOVQ CX, AX /* AX = n1 */ CDQ /* RAX -> RDX:RAX sign extension */ IDIVQ TOP /* RDX:RAX / TOP => Quotient in RAX, Remainder in RDX */ MOVQ DX, CX /* CX = remainder */ MOVQ AX, TOP /* TOP = quotient */ POPQ AX POPQ DX MOVQ CX, (PSP) /* -- remainder quotient */ NEXT TEXT uslashmod(SB), 1, $-4 /* ( u1 u2 -- uremainder uquotient ) */ MOVQ (PSP), CX /* CX = n1 */ PUSHQ DX /* DX == PSP, store DX and AX as they are used by CDQ and IDIV */ PUSHQ AX XORQ DX, DX /* DX = 0 */ MOVQ CX, AX /* AX = n1 */ IDIVQ TOP /* RDX:RAX / TOP => Quotient in RAX, Remainder in RDX */ MOVQ DX, CX /* CX = remainder */ MOVQ AX, TOP /* TOP = quotient */ POPQ AX POPQ DX MOVQ CX, (PSP) /* -- uremainder uquotient */ NEXT MOVQ TOP, TOP MOVQ (PSP), TOP XORQ PSP, PSP DIVQ TOP MOVQ PSP, (PSP) NEXT TEXT binand(SB), 1, $-4 /* ( n1 n2 -- n ) */ ANDQ (PSP), TOP ADDQ $8, PSP NEXT TEXT binor(SB), 1, $-4 /* ( n1 n2 -- n ) */ ORQ (PSP), TOP ADDQ $8, PSP NEXT TEXT binxor(SB), 1, $-4 /* ( n1 n2 -- n ) */ XORQ (PSP), TOP ADDQ $8, PSP NEXT TEXT xswap(SB), 1, $-4 /* ( x y -- y x ) */ XCHGQ TOP, (PSP) NEXT TEXT drop(SB), 1, $-4 /* ( x -- ) */ POP(TOP) NEXT TEXT dup(SB), 1, $-4 /* ( x -- x x ) */ PUSH(TOP) NEXT TEXT over(SB), 1, $-4 /* ( x y -- x y x ) */ PUSH(TOP) MOVQ 8(PSP), TOP NEXT TEXT equal(SB), 1, $-4 /* ( x y -- f ) */ POP(CX) CMPQ CX, TOP JEQ .true XORQ TOP, TOP NEXT TEXT true(SB), 1, $-4 .true: MOVQ $-1, TOP NEXT TEXT greater(SB), 1, $-4 /* ( x y -- f ) */ POP(CX) CMPQ CX, TOP JGT .true XORQ TOP, TOP NEXT /* if x < y then y - x > 0, no sign flag intel manual says destination operand - source operand 9front comparision operators make the usage more english-like For, CMP x,y; JGT greater can be read as compare x and y; if x > y, goto greater check tests/cmp.s for tests */ TEXT less(SB), 1, $-4 /* ( x y -- f ) */ POP(CX) CMPQ CX, TOP JLT .true XORQ TOP, TOP NEXT /* Return the address of the top of the stack, just before sp@ was executed. 1 2 S0 s@ - hex cr . . . cr gives 18 2 1, so S@ would have been pointing at S0 18 in hex translates to 3 64-bit cells */ TEXT stackptr(SB), 1, $-4 /* ( -- a ) does not include TOP! */ MOVQ PSP, CX PUSH(TOP) MOVQ CX, TOP NEXT TEXT lshift(SB), 1, $-4 /* ( n1 n2 -- n1<<n2 ) */ MOVQ TOP, CX POP(TOP) SHLQ CL, TOP NEXT TEXT rshift(SB), 1, $-4 /* ( n1 n2 -- n1>>n2 ) */ MOVQ TOP, CX POP(TOP) SHRQ CL, TOP NEXT TEXT rshifta(SB), 1, $-4 /* ( n1 n2 -- n ) */ MOVQ TOP, CX POP(TOP) SARQ CL, TOP NEXT /* moves n bytes from a1 to a2 */ TEXT cmove(SB), 1, $-4 /* ( a1 a2 n -- ) */ PUSH(TOP) MOVQ 8(PSP), CX /* a2 */ MOVQ CX, TOP CALL validateaddress(SB) /* a1 a2 n a2 -- a1 a2 n */ PUSH(TOP) MOVQ 16(PSP), CX /* a1 */ MOVQ CX, TOP CALL validateaddress(SB) /* a1 a2 n a1 -- a1 a2 n */ POP(DI) POP(SI) MOVQ TOP, CX POP(TOP) /* SI = a1, DI = a2, CX = n */ REP; MOVSB NEXT /* moves n bytes from a1+n-1 to a2+n-1 until n = 0 */ TEXT cmoveb(SB), 1, $-4 /* ( a1 a2 n -- ) */ PUSH(TOP) MOVQ 8(PSP), CX /* a2 */ MOVQ CX, TOP CALL validateaddress(SB) /* a1 a2 n a2 -- a1 a2 n */ PUSH(TOP) MOVQ 16(PSP), CX /* a1 */ MOVQ CX, TOP CALL validateaddress(SB) /* a1 a2 n a1 -- a1 a2 n */ MOVQ TOP, CX DECQ TOP /* TOP = n-1, CX = n */ POP(DI) ADDQ TOP, DI POP(SI) ADDQ TOP, SI POP(TOP) /* CX = n, SI = a1+n-1, DI = a2+n-1 */ STD REP; MOVSB CLD NEXT TEXT cas(SB), 1, $-4 /* ( a old new -- f ) */ MOVQ TOP, DI /* new */ POP(TOP) /* old */ POP(SI) /* addr */ LOCK; CMPXCHGQ DI, (SI) JE .true XORQ TOP, TOP /* pause -- no equivalent in 6a ? */ NEXT /* static locations */ TEXT S0(SB), 1, $-4 /* S0 needs a calculation to come up with the value */ PUSH(TOP) MOVQ UP, TOP ADDQ $PSTACK, TOP NEXT TEXT Wordb(SB), 1, $-4 /* WORDB location */ PUSH(TOP) MOVQ UP, TOP ADDQ $WORDB, TOP NEXT TEXT Tib(SB), 1, $-4 /* TIB location */ PUSH(TOP) MOVQ UP, TOP ADDQ $TIB, TOP NEXT TEXT Dp(SB), 1, $-4 /* S0 needs a calculation to come up with the value */ PUSH(TOP) MOVQ UP, TOP ADDQ $HERE, TOP NEXT TEXT Dtop(SB), 1, $-4 /* S0 needs a calculation to come up with the value */ PUSH(TOP) MOVQ UP, TOP ADDQ $DTOP, TOP NEXT /* Dp for the variable space */ TEXT Vp(SB), 1, $-4 /* S0 needs a calculation to come up with the value */ PUSH(TOP) MOVQ UP, TOP ADDQ $VHERE, TOP NEXT TEXT Args(SB), 1, $-4 PUSH(TOP) MOVQ UP, TOP ADDQ $ARGS, TOP NEXT /* * variables used by the core words. Using variable code word instead of known locations. #define VARIABLE(name, location) TEXT name(SB), 1, $-4 ;\ PUSH(TOP); \ MOVQ UP, TOP ;\ ADDQ location, TOP ;\ NEXT; VARIABLE(Tib, $TIB) */ /* * routines called by forth asm macros or bindings */ /* callable by forth primitives to check address ( a -- -1|0|1 ) argument 1 in TOP = address return value in TOP -1 0 1 if UP < address < UPE return 0 within range else if address < UP return -1 below UP else if UPE < address return 1 above UP */ TEXT inup(SB), 1, $-4 CMPQ TOP, UPE JGT aboveupe /* a > UPE */ CMPQ TOP, UP JLT belowup /* a < UP */ MOVQ $0, TOP /* could use XORQ TOP, TOP to zero too */ RET belowup: MOVQ $-1, TOP RET aboveupe: MOVQ $1, TOP RET /* callable by forth primitives to check address ( n a -- -1|0|1 ) argument 1 in TOP = address return value in TOP -1 0 1 if UP < address && address+n < UPE return 0 within range else if address < UP return -1 below UP else if UPE < address+n return 1 above UP */ TEXT bufinup(SB), 1, $-4 POP(CX) CMPQ CX, $0 /* negative n? */ JLT belowup /* TODO have an appropriate error message */ ADDQ TOP, CX CMPQ CX, UPE /* a+n, UPE */ JGT aboveupe /* a+n > UPE */ CMPQ TOP, UP JLT belowup /* a < UP */ MOVQ $0, TOP RET invalidaddress: /* TODO need error reporting here */ INT $0x0D /* general protection error */ RET TEXT validateaddress(SB), 1, $0 /* a -- */ CALL inup(SB) MOVQ TOP, CX POP(TOP) CMPQ CX, $0 JNE invalidaddress RET TEXT validatebuffer(SB), 1, $0 /* n a -- */ CALL bufinup(SB) MOVQ TOP, CX POP(TOP) CMPQ CX, $0 JNE invalidaddress RET TEXT forthend(SB), 1, $-4 END