ref: feb88f048d3ebfa1ff5e2b8e61dc3ad0baacffbc
dir: /appl/lib/readjpg.b/
implement RImagefile; include "sys.m"; sys: Sys; include "draw.m"; include "bufio.m"; bufio: Bufio; Iobuf: import bufio; include "imagefile.m"; # Constants, all preceded by byte 16rFF SOF: con byte 16rC0; # Start of Frame SOF2: con byte 16rC2; # Start of Frame; progressive Huffman JPG: con byte 16rC8; # Reserved for JPEG extensions DHT: con byte 16rC4; # Define Huffman Tables DAC: con byte 16rCC; # Arithmetic coding conditioning RST: con byte 16rD0; # Restart interval termination RST7: con byte 16rD7; # Restart interval termination (highest value) SOI: con byte 16rD8; # Start of Image EOI: con byte 16rD9; # End of Image SOS: con byte 16rDA; # Start of Scan DQT: con byte 16rDB; # Define quantization tables DNL: con byte 16rDC; # Define number of lines DRI: con byte 16rDD; # Define restart interval DHP: con byte 16rDE; # Define hierarchical progression EXP: con byte 16rDF; # Expand reference components APPn: con byte 16rE0; # Reserved for application segments JPGn: con byte 16rF0; # Reserved for JPEG extensions COM: con byte 16rFE; # Comment Header: adt { fd: ref Iobuf; ch: chan of (ref Rawimage, string); # variables in i/o routines sr: int; # shift register, right aligned cnt: int; # # bits in right part of sr buf: array of byte; bufi: int; nbuf: int; Nf: int; comp: array of Framecomp; mode: byte; X,Y: int; qt: array of array of int; # quantization tables dcht: array of ref Huffman; acht: array of ref Huffman; sf: array of byte; # start of frame; do better later ss: array of byte; # start of scan; do better later ri: int; }; NBUF: con 16*1024; Huffman: adt { bits: array of int; size: array of int; code: array of int; val: array of int; mincode: array of int; maxcode: array of int; valptr: array of int; # fast lookup value: array of int; shift: array of int; }; Framecomp: adt # Frame component specifier from SOF marker { C: int; H: int; V: int; Tq: int; }; zerobytes: array of byte; zeroints: array of int; zeroreals: array of real; clamp: array of byte; NCLAMP: con 1000; CLAMPOFF: con 300; init(iomod: Bufio) { if(sys == nil) sys = load Sys Sys->PATH; bufio = iomod; zerobytes = array[8*8] of byte; zeroints = array[8*8] of int; zeroreals = array[8*8] of real; for(k:=0; k<8*8; k++){ zerobytes[k] = byte 0; zeroints[k] = 0; zeroreals[k] = 0.0; } clamp = array[NCLAMP] of byte; for(k=0; k<CLAMPOFF; k++) clamp[k] = byte 0; for(; k<CLAMPOFF+256; k++) clamp[k] = byte(k-CLAMPOFF); for(; k<NCLAMP; k++) clamp[k] = byte 255; } read(fd: ref Iobuf): (ref Rawimage, string) { # spawn a subprocess so I/O errors can clean up easily ch := chan of (ref Rawimage, string); spawn readslave(fd, ch); return <-ch; } readmulti(fd: ref Iobuf): (array of ref Rawimage, string) { (i, err) := read(fd); if(i != nil){ a := array[1] of { i }; return (a, err); } return (nil, err); } readslave(fd: ref Iobuf, ch: chan of (ref Rawimage, string)) { image: ref Rawimage; (header, err) := soiheader(fd, ch); if(header == nil){ ch <-= (nil, err); exit; } buf := header.buf; nseg := 0; Loop: while(err == ""){ m: int; b: array of byte; nseg++; (m, b, err) = readsegment(header); case m{ -1 => break Loop; int APPn+0 => if(nseg==1 && string b[0:4]=="JFIF"){ # JFIF header; check version vers0 := int b[5]; vers1 := int b[6]; if(vers0>1 || vers1>2) err = sys->sprint("ReadJPG: can't handle JFIF version %d.%2d", vers0, vers1); } int APPn+1 to int APPn+15 => ; int DQT => err = quanttables(header, b); int SOF => header.Y = int2(b, 1); header.X = int2(b, 3); header.Nf = int b[5]; header.comp = array[header.Nf] of Framecomp; for(i:=0; i<header.Nf; i++){ header.comp[i].C = int b[6+3*i+0]; (H, V) := nibbles(b[6+3*i+1]); header.comp[i].H = H; header.comp[i].V = V; header.comp[i].Tq = int b[6+3*i+2]; } header.mode = SOF; header.sf = b; int SOF2 => err = sys->sprint("ReadJPG: can't handle progressive Huffman mode"); break Loop; int SOS => header.ss = b; (image, err) = decodescan(header); if(err != "") break Loop; # BUG: THIS SHOULD USE THE LOOP TO FINISH UP x := nextbyte(header, 1); if(x != 16rFF) err = sys->sprint("ReadJPG: didn't see marker at end of scan; saw %x", x); else{ x = nextbyte(header, 1); if(x != int EOI) err = sys->sprint("ReadJPG: expected EOI saw %x", x); } break Loop; int DHT => err = huffmantables(header, b); int DRI => header.ri = int2(b, 0); int COM => ; int EOI => break Loop; * => err = sys->sprint("ReadJPG: unknown marker %.2x", m); } } ch <-= (image, err); } readerror(): string { return sys->sprint("ReadJPG: read error: %r"); } marker(buf: array of byte, n: int): byte { if(buf[n] != byte 16rFF) return byte 0; return buf[n+1]; } int2(buf: array of byte, n: int): int { return (int buf[n]<<8)+(int buf[n+1]); } nibbles(b: byte): (int, int) { i := int b; return (i>>4, i&15); } soiheader(fd: ref Iobuf, ch: chan of (ref Rawimage, string)): (ref Header, string) { # 1+ for restart preamble (see nextbyte), +1 for sentinel buf := array[1+NBUF+1] of byte; if(fd.read(buf, 2) != 2) return (nil, sys->sprint("ReadJPG: can't read header: %r")); if(marker(buf, 0) != SOI) return (nil, sys->sprint("ReadJPG: unrecognized marker in header")); h := ref Header; h.buf = buf; h.bufi = 0; h.nbuf = 0; h.fd = fd; h.ri = 0; h.ch = ch; return (h, nil); } readsegment(h: ref Header): (int, array of byte, string) { if(h.fd.read(h.buf, 2) != 2) return (-1, nil, readerror()); m := int marker(h.buf, 0); case m{ int EOI => return (m, nil, nil); 0 => err := sys->sprint("ReadJPG: expecting marker; saw %.2x%.2x)", int h.buf[0], int h.buf[1]); return (-1, nil, err); } if(h.fd.read(h.buf, 2) != 2) return (-1, nil, readerror()); n := int2(h.buf, 0); if(n < 2) return (-1, nil, readerror()); n -= 2; # if(n > len h.buf){ # h.buf = array[n+1] of byte; # +1 for sentinel # #h.nbuf = n; # } b := array[n] of byte; if(h.fd.read(b, n) != n) return (-1, nil, readerror()); return (m, b, nil); } huffmantables(h: ref Header, b: array of byte): string { if(h.dcht == nil){ h.dcht = array[4] of ref Huffman; h.acht = array[4] of ref Huffman; } err: string; mt: int; for(l:=0; l<len b; l+=17+mt){ (mt, err) = huffmantable(h, b[l:]); if(err != nil) return err; } return nil; } huffmantable(h: ref Header, b: array of byte): (int, string) { t := ref Huffman; (Tc, th) := nibbles(b[0]); if(Tc > 1) return (0, sys->sprint("ReadJPG: unknown Huffman table class %d", Tc)); if(th>3 || (h.mode==SOF && th>1)) return (0, sys->sprint("ReadJPG: unknown Huffman table index %d", th)); if(Tc == 0) h.dcht[th] = t; else h.acht[th] = t; # flow chart C-2 nsize := 0; for(i:=0; i<16; i++) nsize += int b[1+i]; t.size = array[nsize+1] of int; k := 0; for(i=1; i<=16; i++){ n := int b[i]; for(j:=0; j<n; j++) t.size[k++] = i; } t.size[k] = 0; # initialize HUFFVAL t.val = array[nsize] of int; for(i=0; i<nsize; i++){ t.val[i] = int b[17+i]; } # flow chart C-3 t.code = array[nsize+1] of int; k = 0; code := 0; si := t.size[0]; for(;;){ do t.code[k++] = code++; while(t.size[k] == si); if(t.size[k] == 0) break; do{ code <<= 1; si++; }while(t.size[k] != si); } # flow chart F-25 t.mincode = array[17] of int; t.maxcode = array[17] of int; t.valptr = array[17] of int; i = 0; j := 0; F25: for(;;){ for(;;){ i++; if(i > 16) break F25; if(int b[i] != 0) break; t.maxcode[i] = -1; } t.valptr[i] = j; t.mincode[i] = t.code[j]; j += int b[i]-1; t.maxcode[i] = t.code[j]; j++; } # create byte-indexed fast path tables t.value = array[256] of int; t.shift = array[256] of int; maxcode := t.maxcode; # stupid startup algorithm: just run machine for each byte value Bytes: for(v:=0; v<256; v++){ cnt := 7; m := 1<<7; code = 0; sr := v; i = 1; for(;;i++){ if(sr & m) code |= 1; if(code <= maxcode[i]) break; code <<= 1; m >>= 1; if(m == 0){ t.shift[v] = 0; t.value[v] = -1; continue Bytes; } cnt--; } t.shift[v] = 8-cnt; t.value[v] = t.val[t.valptr[i]+(code-t.mincode[i])]; } return (nsize, nil); } quanttables(h: ref Header, b: array of byte): string { if(h.qt == nil) h.qt = array[4] of array of int; err: string; n: int; for(l:=0; l<len b; l+=1+n){ (n, err) = quanttable(h, b[l:]); if(err != nil) return err; } return nil; } quanttable(h: ref Header, b: array of byte): (int, string) { (pq, tq) := nibbles(b[0]); if(pq > 1) return (0, sys->sprint("ReadJPG: unknown quantization table class %d", pq)); if(tq > 3) return (0, sys->sprint("ReadJPG: unknown quantization table index %d", tq)); q := array[64] of int; h.qt[tq] = q; for(i:=0; i<64; i++){ if(pq == 0) q[i] = int b[1+i]; else q[i] = int2(b, 1+2*i); } return (64*(1+pq), nil); } zig := array[64] of { 0, 1, 8, 16, 9, 2, 3, 10, 17, # 0-7 24, 32, 25, 18, 11, 4, 5, # 8-15 12, 19, 26, 33, 40, 48, 41, 34, # 16-23 27, 20, 13, 6, 7, 14, 21, 28, # 24-31 35, 42, 49, 56, 57, 50, 43, 36, # 32-39 29, 22, 15, 23, 30, 37, 44, 51, # 40-47 58, 59, 52, 45, 38, 31, 39, 46, # 48-55 53, 60, 61, 54, 47, 55, 62, 63 # 56-63 }; decodescan(h: ref Header): (ref Rawimage, string) { ss := h.ss; Ns := int ss[0]; if((Ns!=3 && Ns!=1) || Ns!=h.Nf) return (nil, "ReadJPG: can't handle scan not 3 components"); image := ref Rawimage; image.r = ((0, 0), (h.X, h.Y)); image.cmap = nil; image.transp = 0; image.trindex = byte 0; image.fields = 0; image.chans = array[h.Nf] of array of byte; if(Ns == 3) image.chandesc = CRGB; else image.chandesc = CY; image.nchans = h.Nf; for(k:=0; k<h.Nf; k++) image.chans[k] = array[h.X*h.Y] of byte; # build per-component arrays Td := array[Ns] of int; Ta := array[Ns] of int; data := array[Ns] of array of array of real; H := array[Ns] of int; V := array[Ns] of int; DC := array[Ns] of int; # compute maximum H and V Hmax := 0; Vmax := 0; for(comp:=0; comp<Ns; comp++){ if(h.comp[comp].H > Hmax) Hmax = h.comp[comp].H; if(h.comp[comp].V > Vmax) Vmax = h.comp[comp].V; } # initialize data structures allHV1 := 1; for(comp=0; comp<Ns; comp++){ # JPEG requires scan components to be in same order as in frame, # so if both have 3 we know scan is Y Cb Cr and there's no need to # reorder cs := int ss[1+2*comp]; (Td[comp], Ta[comp]) = nibbles(ss[2+2*comp]); H[comp] = h.comp[comp].H; V[comp] = h.comp[comp].V; nblock := H[comp]*V[comp]; if(nblock != 1) allHV1 = 0; data[comp] = array[nblock] of array of real; DC[comp] = 0; for(m:=0; m<nblock; m++) data[comp][m] = array[8*8] of real; } ri := h.ri; h.buf[0] = byte 16rFF; # see nextbyte() h.cnt = 0; h.sr = 0; nacross := ((h.X+(8*Hmax-1))/(8*Hmax)); nmcu := ((h.Y+(8*Vmax-1))/(8*Vmax))*nacross; zz := array[64] of real; err := ""; for(mcu:=0; mcu<nmcu; ){ for(comp=0; comp<Ns; comp++){ dcht := h.dcht[Td[comp]]; acht := h.acht[Ta[comp]]; qt := h.qt[h.comp[comp].Tq]; for(block:=0; block<H[comp]*V[comp]; block++){ # F-22 t := decode(h, dcht); diff := receive(h, t); DC[comp] += diff; # F-23 zz[0:] = zeroreals; zz[0] = real (qt[0]*DC[comp]); k = 1; for(;;){ rs := decode(h, acht); (rrrr, ssss) := nibbles(byte rs); if(ssss == 0){ if(rrrr != 15) break; k += 16; }else{ k += rrrr; z := receive(h, ssss); zz[zig[k]] = real (z*qt[k]); if(k == 63) break; k++; } } idct(zz, data[comp][block]); } } # rotate colors to RGB and assign to bytes if(Ns == 1) # very easy colormap1(h, image, data[0][0], mcu, nacross); else if(allHV1) # fairly easy colormapall1(h, image, data[0][0], data[1][0], data[2][0], mcu, nacross); else # miserable general case colormap(h, image, data[0], data[1], data[2], mcu, nacross, Hmax, Vmax, H, V); # process restart marker, if present mcu++; if(ri>0 && mcu<nmcu-1 && mcu%ri==0){ restart := mcu/ri-1; rst, nskip: int; nskip = 0; do{ do{ rst = nextbyte(h, 1); nskip++; }while(rst>=0 && rst!=16rFF); if(rst == 16rFF){ rst = nextbyte(h, 1); nskip++; } }while(rst>=0 && (rst&~7)!=int RST); if(nskip != 2) err = sys->sprint("skipped %d bytes at restart %d\n", nskip-2, restart); if(rst < 0) return (nil, readerror()); if((rst&7) != (restart&7)) return (nil, sys->sprint("ReadJPG: expected RST%d got %d", restart&7, int rst&7)); h.cnt = 0; h.sr = 0; for(comp=0; comp<Ns; comp++) DC[comp] = 0; } } return (image, err); } colormap1(h: ref Header, image: ref Rawimage, data: array of real, mcu, nacross: int) { pic := image.chans[0]; minx := 8*(mcu%nacross); dx := 8; if(minx+dx > h.X) dx = h.X-minx; miny := 8*(mcu/nacross); dy := 8; if(miny+dy > h.Y) dy = h.Y-miny; pici := miny*h.X+minx; k := 0; for(y:=0; y<dy; y++){ for(x:=0; x<dx; x++){ r := clamp[int (data[k+x]+128.)+CLAMPOFF]; pic[pici+x] = r; } pici += h.X; k += 8; } } colormapall1(h: ref Header, image: ref Rawimage, data0, data1, data2: array of real, mcu, nacross: int) { rpic := image.chans[0]; gpic := image.chans[1]; bpic := image.chans[2]; minx := 8*(mcu%nacross); dx := 8; if(minx+dx > h.X) dx = h.X-minx; miny := 8*(mcu/nacross); dy := 8; if(miny+dy > h.Y) dy = h.Y-miny; pici := miny*h.X+minx; k := 0; for(y:=0; y<dy; y++){ for(x:=0; x<dx; x++){ Y := data0[k+x]+128.; Cb := data1[k+x]; Cr := data2[k+x]; r := int (Y+1.402*Cr); g := int (Y-0.34414*Cb-0.71414*Cr); b := int (Y+1.772*Cb); rpic[pici+x] = clamp[r+CLAMPOFF]; gpic[pici+x] = clamp[g+CLAMPOFF]; bpic[pici+x] = clamp[b+CLAMPOFF]; } pici += h.X; k += 8; } } colormap(h: ref Header, image: ref Rawimage, data0, data1, data2: array of array of real, mcu, nacross, Hmax, Vmax: int, H, V: array of int) { rpic := image.chans[0]; gpic := image.chans[1]; bpic := image.chans[2]; minx := 8*Hmax*(mcu%nacross); dx := 8*Hmax; if(minx+dx > h.X) dx = h.X-minx; miny := 8*Vmax*(mcu/nacross); dy := 8*Vmax; if(miny+dy > h.Y) dy = h.Y-miny; pici := miny*h.X+minx; H0 := H[0]; H1 := H[1]; H2 := H[2]; for(y:=0; y<dy; y++){ t := y*V[0]; b0 := H0*(t/(8*Vmax)); y0 := 8*((t/Vmax)&7); t = y*V[1]; b1 := H1*(t/(8*Vmax)); y1 := 8*((t/Vmax)&7); t = y*V[2]; b2 := H2*(t/(8*Vmax)); y2 := 8*((t/Vmax)&7); x0 := 0; x1 := 0; x2 := 0; for(x:=0; x<dx; x++){ Y := data0[b0][y0+x0++*H0/Hmax]+128.; Cb := data1[b1][y1+x1++*H1/Hmax]; Cr := data2[b2][y2+x2++*H2/Hmax]; if(x0*H0/Hmax >= 8){ x0 = 0; b0++; } if(x1*H1/Hmax >= 8){ x1 = 0; b1++; } if(x2*H2/Hmax >= 8){ x2 = 0; b2++; } r := int (Y+1.402*Cr); g := int (Y-0.34414*Cb-0.71414*Cr); b := int (Y+1.772*Cb); rpic[pici+x] = clamp[r+CLAMPOFF]; gpic[pici+x] = clamp[g+CLAMPOFF]; bpic[pici+x] = clamp[b+CLAMPOFF]; } pici += h.X; } } # decode next 8-bit value from entropy-coded input. chart F-26 decode(h: ref Header, t: ref Huffman): int { maxcode := t.maxcode; if(h.cnt < 8) nextbyte(h, 0); # fast lookup code := (h.sr>>(h.cnt-8))&16rFF; v := t.value[code]; if(v >= 0){ h.cnt -= t.shift[code]; return v; } h.cnt -= 8; if(h.cnt == 0) nextbyte(h, 0); h.cnt--; cnt := h.cnt; m := 1<<cnt; sr := h.sr; code <<= 1; i := 9; for(;;i++){ if(sr & m) code |= 1; if(code <= maxcode[i]) break; code <<= 1; m >>= 1; if(m == 0){ sr = nextbyte(h, 0); m = 16r80; cnt = 8; } cnt--; } h.cnt = cnt; return t.val[t.valptr[i]+(code-t.mincode[i])]; } # # load next byte of input # we should really just call h.fd.getb(), but it's faster just to use Bufio # to load big chunks and manage our own byte-at-a-time input. # nextbyte(h: ref Header, marker: int): int { b := int h.buf[h.bufi++]; if(b == 16rFF){ # check for sentinel at end of buffer if(h.bufi >= h.nbuf){ underflow := (h.bufi > h.nbuf); h.nbuf = h.fd.read(h.buf, NBUF); if(h.nbuf <= 0){ h.ch <-= (nil, readerror()); exit; } h.buf[h.nbuf] = byte 16rFF; h.bufi = 0; if(underflow) # if ran off end of buffer, just restart return nextbyte(h, marker); } if(marker) return b; b2 := h.buf[h.bufi++]; if(b2 != byte 0){ if(b2 == DNL){ h.ch <-= (nil, "ReadJPG: DNL marker unimplemented"); exit; }else if(b2<RST && RST7<b2){ h.ch <-= (nil, sys->sprint("ReadJPG: unrecognized marker %x", int b2)); exit; } # decode is reading into restart marker; satisfy it and restore state if(h.bufi < 2){ # misery: must shift up buffer h.buf[1:] = h.buf[0:h.nbuf+1]; h.nbuf++; h.buf[0] = byte 16rFF; h.bufi -= 1; }else h.bufi -= 2; b = 16rFF; } } h.cnt += 8; h.sr = (h.sr<<8)|b; return b; } # return next s bits of input, MSB first, and level shift it receive(h: ref Header, s: int): int { while(h.cnt < s) nextbyte(h, 0); v := h.sr >> (h.cnt-s); m := (1<<s); v &= m-1; h.cnt -= s; # level shift if(v < (m>>1)) v += ~(m-1)+1; return v; } # IDCT based on Arai, Agui, and Nakajima, using flow chart Figure 4.8 # of Pennebaker & Mitchell, JPEG: Still Image Data Compression Standard. # Remember IDCT is reverse of flow of DCT. a0: con 1.414; a1: con 0.707; a2: con 0.541; a3: con 0.707; a4: con 1.307; a5: con -0.383; # scaling factors from eqn 4-35 of P&M s1: con 1.0196; s2: con 1.0823; s3: con 1.2026; s4: con 1.4142; s5: con 1.8000; s6: con 2.6131; s7: con 5.1258; # overall normalization of 1/16, folded into premultiplication on vertical pass scale: con 0.0625; idct(zin: array of real, zout: array of real) { x, y: int; r := array[8*8] of real; # transform horizontally for(y=0; y<8; y++){ eighty := y<<3; # if all non-DC components are zero, just propagate the DC term if(zin[eighty+1]==0.) if(zin[eighty+2]==0. && zin[eighty+3]==0.) if(zin[eighty+4]==0. && zin[eighty+5]==0.) if(zin[eighty+6]==0. && zin[eighty+7]==0.){ v := zin[eighty]*a0; r[eighty+0] = v; r[eighty+1] = v; r[eighty+2] = v; r[eighty+3] = v; r[eighty+4] = v; r[eighty+5] = v; r[eighty+6] = v; r[eighty+7] = v; continue; } # step 5 in1 := s1*zin[eighty+1]; in3 := s3*zin[eighty+3]; in5 := s5*zin[eighty+5]; in7 := s7*zin[eighty+7]; f2 := s2*zin[eighty+2]; f3 := s6*zin[eighty+6]; f5 := (in1+in7); f7 := (in5+in3); # step 4 g2 := f2-f3; g4 := (in5-in3); g6 := (in1-in7); g7 := f5+f7; # step 3.5 t := (g4+g6)*a5; # step 3 f0 := a0*zin[eighty+0]; f1 := s4*zin[eighty+4]; f3 += f2; f2 = a1*g2; # step 2 g0 := f0+f1; g1 := f0-f1; g3 := f2+f3; g4 = t-a2*g4; g5 := a3*(f5-f7); g6 = a4*g6+t; # step 1 f0 = g0+g3; f1 = g1+f2; f2 = g1-f2; f3 = g0-g3; f5 = g5-g4; f6 := g5+g6; f7 = g6+g7; # step 6 r[eighty+0] = (f0+f7); r[eighty+1] = (f1+f6); r[eighty+2] = (f2+f5); r[eighty+3] = (f3-g4); r[eighty+4] = (f3+g4); r[eighty+5] = (f2-f5); r[eighty+6] = (f1-f6); r[eighty+7] = (f0-f7); } # transform vertically for(x=0; x<8; x++){ # step 5 in1 := scale*s1*r[x+8]; in3 := scale*s3*r[x+24]; in5 := scale*s5*r[x+40]; in7 := scale*s7*r[x+56]; f2 := scale*s2*r[x+16]; f3 := scale*s6*r[x+48]; f5 := (in1+in7); f7 := (in5+in3); # step 4 g2 := f2-f3; g4 := (in5-in3); g6 := (in1-in7); g7 := f5+f7; # step 3.5 t := (g4+g6)*a5; # step 3 f0 := scale*a0*r[x]; f1 := scale*s4*r[x+32]; f3 += f2; f2 = a1*g2; # step 2 g0 := f0+f1; g1 := f0-f1; g3 := f2+f3; g4 = t-a2*g4; g5 := a3*(f5-f7); g6 = a4*g6+t; # step 1 f0 = g0+g3; f1 = g1+f2; f2 = g1-f2; f3 = g0-g3; f5 = g5-g4; f6 := g5+g6; f7 = g6+g7; # step 6 zout[x] = (f0+f7); zout[x+8] = (f1+f6); zout[x+16] = (f2+f5); zout[x+24] = (f3-g4); zout[x+32] = (f3+g4); zout[x+40] = (f2-f5); zout[x+48] = (f1-f6); zout[x+56] = (f0-f7); } }