ref: 181c1172fa21b8699f1286f2224a35cb79dbb5a8
parent: c11957a7915d355d80ff7e6a21721ba05441ca29
author: 9ferno <[email protected]>
date: Sat Aug 21 05:35:23 EDT 2021
fixed compilation errors of 9front ip
--- /dev/null
+++ b/include/ip.h
@@ -1,0 +1,202 @@
+#pragma src "/sys/src/libip"
+#pragma lib "libip.a"
+
+enum
+{
+ IPaddrlen= 16,
+ IPv4addrlen= 4,
+ IPv4off= 12,
+ IPllen= 4,
+ IPV4HDR_LEN= 20,
+
+ /* vihl & vcf[0] values */
+ IP_VER4= 0x40,
+ IP_VER6= 0x60,
+};
+
+/*
+ * for reading /net/ipifc
+ */
+typedef struct Ipifc Ipifc;
+typedef struct Iplifc Iplifc;
+typedef struct Ipv6rp Ipv6rp;
+
+/* local address */
+struct Iplifc
+{
+ Iplifc *next;
+
+ /* per address on the ip interface */
+ uchar ip[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar net[IPaddrlen]; /* ip & mask */
+ ulong preflt; /* preferred lifetime */
+ ulong validlt; /* valid lifetime */
+};
+
+/* default values, one per stack */
+struct Ipv6rp
+{
+ int mflag;
+ int oflag;
+ int maxraint;
+ int minraint;
+ int linkmtu;
+ int reachtime;
+ int rxmitra;
+ int ttl;
+ int routerlt;
+};
+
+/* actual interface */
+struct Ipifc
+{
+ Ipifc *next;
+ Iplifc *lifc;
+
+ /* per ip interface */
+ int index; /* number of interface in ipifc dir */
+ char dev[64];
+ uchar sendra6; /* on == send router adv */
+ uchar recvra6; /* on == rcv router adv */
+ int mtu;
+ ulong pktin;
+ ulong pktout;
+ ulong errin;
+ ulong errout;
+ Ipv6rp rp;
+};
+
+#define ISIPV6MCAST(addr) ((addr)[0] == 0xff)
+#define ISIPV6LINKLOCAL(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
+
+/*
+ * ipv6 constants
+ * `ra' is `router advertisement', `rs' is `router solicitation'.
+ * `na' is `neighbour advertisement'.
+ */
+enum {
+ IPV6HDR_LEN = 40,
+
+ /* neighbour discovery option types */
+ V6nd_srclladdr = 1,
+ V6nd_targlladdr = 2,
+ V6nd_pfxinfo = 3,
+ V6nd_redirhdr = 4,
+ V6nd_mtu = 5,
+ /* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+ V6nd_home = 8,
+ V6nd_srcaddrs = 9, /* rfc3122 */
+ V6nd_ip = 17,
+ V6nd_rdns = 25, /* rfc6106 */
+ V6nd_rdnssl = 31,
+ /* plan 9 extensions */
+ V6nd_9fs = 250,
+ V6nd_9auth = 251,
+
+ /* Router constants (all times in ms.) */
+ Maxv6initraintvl= 16000,
+ Maxv6initras = 3,
+ Maxv6finalras = 3,
+ Minv6interradelay= 3000,
+ Maxv6radelay = 500,
+
+ /* Host constants */
+ Maxv6rsdelay = 1000,
+ V6rsintvl = 4000,
+ Maxv6rss = 3,
+
+ /* Node constants */
+ Maxv6mcastrss = 3,
+ Maxv6unicastrss = 3,
+ Maxv6anycastdelay= 1000,
+ Maxv6na = 3,
+ V6reachabletime = 30000,
+ V6retranstimer = 1000,
+ V6initprobedelay= 5000,
+};
+
+/* V6 header on the wire */
+typedef struct Ip6hdr Ip6hdr;
+struct Ip6hdr {
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */
+ uchar ploadlen[2]; /* payload length: packet length - 40 */
+ uchar proto; /* next header type */
+ uchar ttl; /* hop limit */
+ uchar src[IPaddrlen]; /* source address */
+ uchar dst[IPaddrlen]; /* destination address */
+ uchar payload[];
+};
+
+/*
+ * user-level icmpv6 with control message "headers"
+ */
+typedef struct Icmp6hdr Icmp6hdr;
+struct Icmp6hdr {
+ uchar _0_[8];
+ uchar laddr[IPaddrlen]; /* local address */
+ uchar raddr[IPaddrlen]; /* remote address */
+};
+
+/*
+ * user level udp headers with control message "headers"
+ */
+enum
+{
+ Udphdrsize= 52, /* size of a Udphdr */
+};
+
+typedef struct Udphdr Udphdr;
+struct Udphdr
+{
+ uchar raddr[IPaddrlen]; /* V6 remote address */
+ uchar laddr[IPaddrlen]; /* V6 local address */
+ uchar ifcaddr[IPaddrlen]; /* V6 ifc addr msg was received on */
+ uchar rport[2]; /* remote port */
+ uchar lport[2]; /* local port */
+};
+
+uchar* defmask(uchar*);
+void maskip(uchar*, uchar*, uchar*);
+int eipfmt(Fmt*);
+int isv4(uchar*);
+vlong parseip(uchar*, char*);
+vlong parseipmask(uchar*, char*, int);
+vlong parseipandmask(uchar*, uchar*, char*, char*);
+char* v4parseip(uchar*, char*);
+int parseether(uchar*, char*);
+int myipaddr(uchar*, char*);
+int myetheraddr(uchar*, char*);
+int equivip4(uchar*, uchar*);
+int equivip6(uchar*, uchar*);
+
+Ipifc* readipifc(char*, Ipifc*, int);
+
+void hnputv(void*, uvlong);
+void hnputl(void*, uint);
+void hnputs(void*, ushort);
+uvlong nhgetv(void*);
+uint nhgetl(void*);
+ushort nhgets(void*);
+ushort ptclbsum(uchar*, int);
+
+int v6tov4(uchar*, uchar*);
+void v4tov6(uchar*, uchar*);
+
+#define ipcmp(x, y) memcmp(x, y, IPaddrlen)
+#define ipmove(x, y) memmove(x, y, IPaddrlen)
+
+extern uchar IPv4bcast[IPaddrlen];
+extern uchar IPv4bcastobs[IPaddrlen];
+extern uchar IPv4allsys[IPaddrlen];
+extern uchar IPv4allrouter[IPaddrlen];
+extern uchar IPnoaddr[IPaddrlen];
+extern uchar v4prefix[IPaddrlen];
+extern uchar IPallbits[IPaddrlen];
+
+#define CLASS(p) ((*(uchar*)(p))>>6)
+
+#pragma varargck type "I" uchar*
+#pragma varargck type "V" uchar*
+#pragma varargck type "E" uchar*
+#pragma varargck type "M" uchar*
--- a/include/kern.h
+++ b/include/kern.h
@@ -605,3 +605,31 @@
(*_argt? _argt: argv[1]? (argc--, *++argv): ((x), abort(), (char*)0)))
#define ARGC() _argc
+
+/* below from ctype.h */
+#define _U 01
+#define _L 02
+#define _N 04
+#define _S 010
+#define _P 020
+#define _C 040
+#define _B 0100
+#define _X 0200
+
+extern unsigned char _ctype[];
+
+#define isalpha(c) (_ctype[(unsigned char)(c)]&(_U|_L))
+#define isupper(c) (_ctype[(unsigned char)(c)]&_U)
+#define islower(c) (_ctype[(unsigned char)(c)]&_L)
+#define isdigit(c) (_ctype[(unsigned char)(c)]&_N)
+#define isxdigit(c) (_ctype[(unsigned char)(c)]&_X)
+#define isspace(c) (_ctype[(unsigned char)(c)]&_S)
+#define ispunct(c) (_ctype[(unsigned char)(c)]&_P)
+#define isalnum(c) (_ctype[(unsigned char)(c)]&(_U|_L|_N))
+#define isprint(c) (_ctype[(unsigned char)(c)]&(_P|_U|_L|_N|_B))
+#define isgraph(c) (_ctype[(unsigned char)(c)]&(_P|_U|_L|_N))
+#define iscntrl(c) (_ctype[(unsigned char)(c)]&_C)
+#define isascii(c) ((unsigned char)(c)<=0177)
+#define _toupper(c) ((c)-'a'+'A')
+#define _tolower(c) ((c)-'A'+'a')
+#define toascii(c) ((c)&0177)
--- /dev/null
+++ b/lib9/ctype.c
@@ -1,0 +1,24 @@
+#include "u.h"
+#include "kern.h"
+
+uchar _ctype[256] =
+{
+/* 0 1 2 3 4 5 6 7 */
+
+/* 0*/ _C, _C, _C, _C, _C, _C, _C, _C,
+/* 10*/ _C, _S|_C, _S|_C, _S|_C, _S|_C, _S|_C, _C, _C,
+/* 20*/ _C, _C, _C, _C, _C, _C, _C, _C,
+/* 30*/ _C, _C, _C, _C, _C, _C, _C, _C,
+/* 40*/ _S|_B, _P, _P, _P, _P, _P, _P, _P,
+/* 50*/ _P, _P, _P, _P, _P, _P, _P, _P,
+/* 60*/ _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, _N|_X,
+/* 70*/ _N|_X, _N|_X, _P, _P, _P, _P, _P, _P,
+/*100*/ _P, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U,
+/*110*/ _U, _U, _U, _U, _U, _U, _U, _U,
+/*120*/ _U, _U, _U, _U, _U, _U, _U, _U,
+/*130*/ _U, _U, _U, _P, _P, _P, _P, _P,
+/*140*/ _P, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L,
+/*150*/ _L, _L, _L, _L, _L, _L, _L, _L,
+/*160*/ _L, _L, _L, _L, _L, _L, _L, _L,
+/*170*/ _L, _L, _L, _P, _P, _P, _P, _C,
+};
--- a/lib9/mkfile
+++ b/lib9/mkfile
@@ -6,6 +6,7 @@
# files used by all models
#
COMMONFILES=\
+ ctype.$O\
convD2M.$O\
convM2D.$O\
convM2S.$O\
--- /dev/null
+++ b/libip/bo.c
@@ -1,0 +1,77 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+void
+hnputv(void *p, u64 v)
+{
+ uchar *a;
+
+ a = p;
+ a[0] = v>>56;
+ a[1] = v>>48;
+ a[2] = v>>40;
+ a[3] = v>>32;
+ a[4] = v>>24;
+ a[5] = v>>16;
+ a[6] = v>>8;
+ a[7] = v;
+}
+
+void
+hnputl(void *p, u32 v)
+{
+ uchar *a;
+
+ a = p;
+ a[0] = v>>24;
+ a[1] = v>>16;
+ a[2] = v>>8;
+ a[3] = v;
+}
+
+void
+hnputs(void *p, u16 v)
+{
+ uchar *a;
+
+ a = p;
+ a[0] = v>>8;
+ a[1] = v;
+}
+
+u64
+nhgetv(void *p)
+{
+ uchar *a;
+ u64 v;
+
+ a = p;
+ v = (uvlong)a[0]<<56;
+ v |= (uvlong)a[1]<<48;
+ v |= (uvlong)a[2]<<40;
+ v |= (uvlong)a[3]<<32;
+ v |= a[4]<<24;
+ v |= a[5]<<16;
+ v |= a[6]<<8;
+ v |= a[7]<<0;
+ return v;
+}
+
+u32
+nhgetl(void *p)
+{
+ uchar *a;
+
+ a = p;
+ return (a[0]<<24)|(a[1]<<16)|(a[2]<<8)|(a[3]<<0);
+}
+
+u16
+nhgets(void *p)
+{
+ uchar *a;
+
+ a = p;
+ return (a[0]<<8)|(a[1]<<0);
+}
--- /dev/null
+++ b/libip/classmask.c
@@ -1,0 +1,86 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+static uchar classmask[4][16] = {
+ 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
+ 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
+ 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0x00,0x00,
+ 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x00,
+};
+
+static uchar v6loopback[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+
+static uchar v6linklocal[IPaddrlen] = {
+ 0xfe, 0x80, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+static uchar v6linklocalmask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+static int v6llpreflen = 8; /* link-local prefix length in bytes */
+
+static uchar v6multicast[IPaddrlen] = {
+ 0xff, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+static uchar v6multicastmask[IPaddrlen] = {
+ 0xff, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+static int v6mcpreflen = 1; /* multicast prefix length */
+
+static uchar v6solicitednode[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01,
+ 0xff, 0, 0, 0
+};
+static uchar v6solicitednodemask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x0, 0x0, 0x0
+};
+static int v6snpreflen = 13;
+
+uchar*
+defmask(uchar *ip)
+{
+ if(isv4(ip))
+ return classmask[ip[IPv4off]>>6];
+ else {
+ if(ipcmp(ip, v6loopback) == 0)
+ return IPallbits;
+ else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
+ return v6linklocalmask;
+ else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
+ return v6solicitednodemask;
+ else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
+ return v6multicastmask;
+ return IPallbits;
+ }
+}
+
+void
+maskip(uchar *from, uchar *mask, uchar *to)
+{
+ int i;
+
+ for(i = 0; i < IPaddrlen; i++)
+ to[i] = from[i] & mask[i];
+}
--- /dev/null
+++ b/libip/eipfmt.c
@@ -1,0 +1,109 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+enum
+{
+ Isprefix= 16,
+};
+
+uchar prefixvals[256] =
+{
+[0x00] 0 | Isprefix,
+[0x80] 1 | Isprefix,
+[0xC0] 2 | Isprefix,
+[0xE0] 3 | Isprefix,
+[0xF0] 4 | Isprefix,
+[0xF8] 5 | Isprefix,
+[0xFC] 6 | Isprefix,
+[0xFE] 7 | Isprefix,
+[0xFF] 8 | Isprefix,
+};
+
+int
+eipfmt(Fmt *f)
+{
+ char buf[5*8];
+ static char *efmt = "%.2ux%.2ux%.2ux%.2ux%.2ux%.2ux";
+ static char *ifmt = "%d.%d.%d.%d";
+ uchar *p, ip[16];
+ ulong *lp;
+ ushort s;
+ int i, j, n, eln, eli;
+
+ switch(f->r) {
+ case 'E': /* Ethernet address */
+ p = va_arg(f->args, uchar*);
+ snprint(buf, sizeof buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
+ return fmtstrcpy(f, buf);
+
+ case 'I': /* Ip address */
+ p = va_arg(f->args, uchar*);
+common:
+ if(memcmp(p, v4prefix, 12) == 0){
+ snprint(buf, sizeof buf, ifmt, p[12], p[13], p[14], p[15]);
+ return fmtstrcpy(f, buf);
+ }
+
+ /* find longest elision */
+ eln = eli = -1;
+ for(i = 0; i < 16; i += 2){
+ for(j = i; j < 16; j += 2)
+ if(p[j] != 0 || p[j+1] != 0)
+ break;
+ if(j > i && j - i > eln){
+ eli = i;
+ eln = j - i;
+ }
+ }
+
+ /* print with possible elision */
+ n = 0;
+ for(i = 0; i < 16; i += 2){
+ if(i == eli){
+ n += sprint(buf+n, "::");
+ i += eln;
+ if(i >= 16)
+ break;
+ } else if(i != 0)
+ n += sprint(buf+n, ":");
+ s = (p[i]<<8) + p[i+1];
+ n += sprint(buf+n, "%ux", s);
+ }
+ return fmtstrcpy(f, buf);
+
+ case 'i': /* v6 address as 4 longs */
+ lp = va_arg(f->args, ulong*);
+ for(i = 0; i < 4; i++)
+ hnputl(ip+4*i, *lp++);
+ p = ip;
+ goto common;
+
+ case 'V': /* v4 ip address */
+ p = va_arg(f->args, uchar*);
+ snprint(buf, sizeof buf, ifmt, p[0], p[1], p[2], p[3]);
+ return fmtstrcpy(f, buf);
+
+ case 'M': /* ip mask */
+ p = va_arg(f->args, uchar*);
+
+ /* look for a prefix mask */
+ for(i = 0; i < 16; i++)
+ if(p[i] != 0xff)
+ break;
+ if(i < 16){
+ if((prefixvals[p[i]] & Isprefix) == 0)
+ goto common;
+ for(j = i+1; j < 16; j++)
+ if(p[j] != 0)
+ goto common;
+ n = 8*i + (prefixvals[p[i]] & ~Isprefix);
+ } else
+ n = 8*16;
+
+ /* got one, use /xx format */
+ snprint(buf, sizeof buf, "/%d", n);
+ return fmtstrcpy(f, buf);
+ }
+ return fmtstrcpy(f, "(eipfmt)");
+}
--- /dev/null
+++ b/libip/equivip.c
@@ -1,0 +1,25 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+int
+equivip4(uchar *a, uchar *b)
+{
+ int i;
+
+ for(i = 0; i < 4; i++)
+ if(a[i] != b[i])
+ return 0;
+ return 1;
+}
+
+int
+equivip6(uchar *a, uchar *b)
+{
+ int i;
+
+ for(i = 0; i < IPaddrlen; i++)
+ if(a[i] != b[i])
+ return 0;
+ return 1;
+}
--- /dev/null
+++ b/libip/ipaux.c
@@ -1,0 +1,102 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+/*
+ * well known IP addresses
+ */
+uchar IPv4bcast[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff
+};
+uchar IPv4allsys[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0xe0, 0, 0, 0x01
+};
+uchar IPv4allrouter[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0xe0, 0, 0, 0x02
+};
+uchar IPallbits[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff
+};
+uchar IPnoaddr[IPaddrlen];
+
+/*
+ * prefix of all v4 addresses
+ */
+uchar v4prefix[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0, 0, 0, 0
+};
+
+int
+isv4(uchar *ip)
+{
+ return memcmp(ip, v4prefix, IPv4off) == 0;
+}
+
+/*
+ * the following routines are unrolled with no memset's to speed
+ * up the usual case
+ */
+void
+v4tov6(uchar *v6, uchar *v4)
+{
+ v6[0] = 0;
+ v6[1] = 0;
+ v6[2] = 0;
+ v6[3] = 0;
+ v6[4] = 0;
+ v6[5] = 0;
+ v6[6] = 0;
+ v6[7] = 0;
+ v6[8] = 0;
+ v6[9] = 0;
+ v6[10] = 0xff;
+ v6[11] = 0xff;
+ v6[12] = v4[0];
+ v6[13] = v4[1];
+ v6[14] = v4[2];
+ v6[15] = v4[3];
+}
+
+int
+v6tov4(uchar *v4, uchar *v6)
+{
+ if(v6[0] == 0
+ && v6[1] == 0
+ && v6[2] == 0
+ && v6[3] == 0
+ && v6[4] == 0
+ && v6[5] == 0
+ && v6[6] == 0
+ && v6[7] == 0
+ && v6[8] == 0
+ && v6[9] == 0
+ && v6[10] == 0xff
+ && v6[11] == 0xff)
+ {
+ v4[0] = v6[12];
+ v4[1] = v6[13];
+ v4[2] = v6[14];
+ v4[3] = v6[15];
+ return 0;
+ } else {
+ memset(v4, 0, 4);
+ if(memcmp(v6, IPnoaddr, IPaddrlen) == 0)
+ return 0;
+ return -1;
+ }
+}
--- /dev/null
+++ b/libip/mkfile
@@ -1,0 +1,20 @@
+<../mkconfig
+
+LIB=libip.a
+OFILES=\
+ eipfmt.$O\
+ equivip.$O\
+ parseip.$O\
+ parseether.$O\
+ myetheraddr.$O\
+ myipaddr.$O\
+ classmask.$O\
+ bo.$O\
+ readipifc.$O\
+ ipaux.$O\
+ ptclbsum.$O\
+
+HFILES=\
+ $ROOT/include/ip.h\
+
+<$ROOT/mkfiles/mksyslib-$SHELLTYPE
--- /dev/null
+++ b/libip/myetheraddr.c
@@ -1,0 +1,28 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+int
+myetheraddr(uchar *to, char *dev)
+{
+ int n, fd;
+ char buf[256];
+
+ if(*dev == '/' || *dev == '#')
+ snprint(buf, sizeof buf, "%s/addr", dev);
+ else
+ snprint(buf, sizeof buf, "/net/%s/addr", dev);
+
+ fd = open(buf, OREAD);
+ if(fd < 0)
+ return -1;
+
+ n = read(fd, buf, sizeof buf -1 );
+ close(fd);
+ if(n <= 0)
+ return -1;
+ buf[n] = 0;
+
+ parseether(to, buf);
+ return 0;
+}
--- /dev/null
+++ b/libip/myipaddr.c
@@ -1,0 +1,69 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+static uchar loopbacknet[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 127, 0, 0, 0
+};
+static uchar loopbackmask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0, 0, 0
+};
+static uchar loopback6[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 1
+};
+
+// find first ip that isn't a friggin loopback or
+// link-local address. prefer v4 over v6.
+int
+myipaddr(uchar *ip, char *net)
+{
+ Ipifc *nifc;
+ Iplifc *lifc;
+ static Ipifc *ifc;
+ uchar mynet[IPaddrlen];
+
+ ipmove(ip, IPnoaddr);
+ ifc = readipifc(net, ifc, -1);
+ for(nifc = ifc; nifc != nil; nifc = nifc->next){
+ for(lifc = nifc->lifc; lifc != nil; lifc = lifc->next){
+ /* unspecified */
+ if(ipcmp(lifc->ip, IPnoaddr) == 0)
+ continue;
+
+ if(isv4(lifc->ip)){
+ /* ipv4 loopback */
+ maskip(lifc->ip, loopbackmask, mynet);
+ if(ipcmp(mynet, loopbacknet) == 0)
+ continue;
+
+ ipmove(ip, lifc->ip);
+ return 0;
+ }
+
+ /* already got a v6 address? */
+ if(ipcmp(ip, IPnoaddr) != 0)
+ continue;
+
+ /* ipv6 loopback */
+ if(ipcmp(lifc->ip, loopback6) == 0)
+ continue;
+
+ /* ipv6 linklocal */
+ if(ISIPV6LINKLOCAL(lifc->ip))
+ continue;
+
+ /* save first v6 address */
+ ipmove(ip, lifc->ip);
+ }
+ }
+ return ipcmp(ip, IPnoaddr) != 0 ? 0 : -1;
+}
--- /dev/null
+++ b/libip/parseether.c
@@ -1,0 +1,25 @@
+#include "u.h"
+#include "kern.h"
+
+int
+parseether(uchar *to, char *from)
+{
+ char nip[4];
+ char *p;
+ int i;
+
+ p = from;
+ for(i = 0; i < 6; i++){
+ if(*p == 0)
+ return -1;
+ nip[0] = *p++;
+ if(*p == 0)
+ return -1;
+ nip[1] = *p++;
+ nip[2] = 0;
+ to[i] = strtoul(nip, 0, 16);
+ if(*p == ':')
+ p++;
+ }
+ return 0;
+}
--- /dev/null
+++ b/libip/parseip.c
@@ -1,0 +1,202 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+/* TODO this is defined in lib9/ctype.c
+ * but still getting a compilation error
+ * _strayintrx: _ctype: not defined
+ * need to figure out why
+ */
+uchar _ctype[256] =
+{
+/* 0 1 2 3 4 5 6 7 */
+
+/* 0*/ _C, _C, _C, _C, _C, _C, _C, _C,
+/* 10*/ _C, _S|_C, _S|_C, _S|_C, _S|_C, _S|_C, _C, _C,
+/* 20*/ _C, _C, _C, _C, _C, _C, _C, _C,
+/* 30*/ _C, _C, _C, _C, _C, _C, _C, _C,
+/* 40*/ _S|_B, _P, _P, _P, _P, _P, _P, _P,
+/* 50*/ _P, _P, _P, _P, _P, _P, _P, _P,
+/* 60*/ _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, _N|_X, _N|_X,
+/* 70*/ _N|_X, _N|_X, _P, _P, _P, _P, _P, _P,
+/*100*/ _P, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U,
+/*110*/ _U, _U, _U, _U, _U, _U, _U, _U,
+/*120*/ _U, _U, _U, _U, _U, _U, _U, _U,
+/*130*/ _U, _U, _U, _P, _P, _P, _P, _P,
+/*140*/ _P, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L,
+/*150*/ _L, _L, _L, _L, _L, _L, _L, _L,
+/*160*/ _L, _L, _L, _L, _L, _L, _L, _L,
+/*170*/ _L, _L, _L, _P, _P, _P, _P, _C,
+};
+
+char*
+v4parseip(uchar *to, char *from)
+{
+ int i;
+ char *p;
+
+ p = from;
+ for(i = 0; i < 4 && *p; i++){
+ to[i] = strtoul(p, &p, 0);
+ if(*p == '.')
+ p++;
+ }
+ switch(CLASS(to)){
+ case 0: /* class A - 1 uchar net */
+ case 1:
+ if(i == 3){
+ to[3] = to[2];
+ to[2] = to[1];
+ to[1] = 0;
+ } else if (i == 2){
+ to[3] = to[1];
+ to[1] = 0;
+ }
+ break;
+ case 2: /* class B - 2 uchar net */
+ if(i == 3){
+ to[3] = to[2];
+ to[2] = 0;
+ }
+ break;
+ }
+ return p;
+}
+
+static int
+ipcharok(int c)
+{
+ return c == '.' || c == ':' || isascii(c) && isxdigit(c);
+}
+
+static int
+delimchar(int c)
+{
+ if(c == '\0')
+ return 1;
+ if(c == '.' || c == ':' || isascii(c) && isalnum(c))
+ return 0;
+ return 1;
+}
+
+/*
+ * `from' may contain an address followed by other characters,
+ * at least in /boot, so we permit whitespace (and more) after the address.
+ * we do ensure that "delete" cannot be parsed as "de::".
+ *
+ * some callers don't check the return value for errors, so
+ * set `to' to something distinctive in the case of a parse error.
+ */
+vlong
+parseip(uchar *to, char *from)
+{
+ int i, elipsis = 0, v4 = 1;
+ ulong x;
+ char *p, *op;
+
+ memset(to, 0, IPaddrlen);
+ p = from;
+ for(i = 0; i < IPaddrlen && ipcharok(*p); i+=2){
+ op = p;
+ x = strtoul(p, &p, 16);
+ if(*p == '.' || (*p == 0 && i == 0)){ /* ends with v4? */
+ if(i > IPaddrlen-4){
+ memset(to, 0, IPaddrlen);
+ return -1; /* parse error */
+ }
+ p = v4parseip(to+i, op);
+ i += 4;
+ break;
+ }
+ /* v6: at most 4 hex digits, followed by colon or delim */
+ if(x != (ushort)x || *p != ':' && !delimchar(*p)) {
+ memset(to, 0, IPaddrlen);
+ return -1; /* parse error */
+ }
+ to[i] = x>>8;
+ to[i+1] = x;
+ if(*p == ':'){
+ v4 = 0;
+ if(*++p == ':'){ /* :: is elided zero short(s) */
+ if (elipsis) {
+ memset(to, 0, IPaddrlen);
+ return -1; /* second :: */
+ }
+ elipsis = i+2;
+ p++;
+ }
+ } else if (p == op) /* strtoul made no progress? */
+ break;
+ }
+ if (p == from || !delimchar(*p)) {
+ memset(to, 0, IPaddrlen);
+ return -1; /* parse error */
+ }
+ if(i < IPaddrlen){
+ memmove(&to[elipsis+IPaddrlen-i], &to[elipsis], i-elipsis);
+ memset(&to[elipsis], 0, IPaddrlen-i);
+ }
+ if(v4){
+ to[10] = to[11] = 0xff;
+ return (ulong)nhgetl(to + IPv4off);
+ } else
+ return 6;
+}
+
+/*
+ * hack to allow ip v4 masks to be entered in the old
+ * style
+ */
+vlong
+parseipmask(uchar *to, char *from, int v4)
+{
+ vlong x;
+ int i, w;
+ uchar *p;
+
+ if(*from == '/'){
+ /* as a number of prefix bits */
+ i = atoi(from+1);
+ if(i < 0)
+ i = 0;
+ if(i <= 32 && v4)
+ i += 96;
+ if(i > 128)
+ i = 128;
+ w = i;
+ memset(to, 0, IPaddrlen);
+ for(p = to; i >= 8; i -= 8)
+ *p++ = 0xff;
+ if(i > 0)
+ *p = ~((1<<(8-i))-1);
+ /*
+ * identify as ipv6 if the mask is inexpressible as a v4 mask
+ * (because it has too few mask bits). Arguably, we could
+ * always return 6 here.
+ */
+ if (w < 96)
+ return v4 ? -1 : 6;
+ x = (ulong)nhgetl(to+IPv4off);
+ } else {
+ /* as a straight v4 bit mask */
+ x = parseip(to, from);
+ if(memcmp(to, v4prefix, IPv4off) == 0)
+ memset(to, 0xff, IPv4off);
+ else if(v4 && memcmp(to, IPallbits, IPv4off) != 0)
+ x = -1;
+ }
+ return x;
+}
+
+vlong
+parseipandmask(uchar *ip, uchar *mask, char *ipstr, char *maskstr)
+{
+ vlong x;
+
+ x = parseip(ip, ipstr);
+ if(maskstr == nil)
+ memset(mask, 0xff, IPaddrlen);
+ else if(parseipmask(mask, maskstr, memcmp(ip, v4prefix, IPv4off) == 0) == -1)
+ x = -1;
+ return x;
+}
--- /dev/null
+++ b/libip/ptclbsum.c
@@ -1,0 +1,68 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+static short endian = 1;
+static uchar* aendian = (uchar*)&endian;
+#define LITTLE *aendian
+
+ushort
+ptclbsum(uchar *addr, int len)
+{
+ ulong losum, hisum, mdsum, x;
+ ulong t1, t2;
+
+ losum = 0;
+ hisum = 0;
+ mdsum = 0;
+
+ x = 0;
+ if((uintptr)addr & 1) {
+ if(len) {
+ hisum += addr[0];
+ len--;
+ addr++;
+ }
+ x = 1;
+ }
+ while(len >= 16) {
+ t1 = *(ushort*)(addr+0);
+ t2 = *(ushort*)(addr+2); mdsum += t1;
+ t1 = *(ushort*)(addr+4); mdsum += t2;
+ t2 = *(ushort*)(addr+6); mdsum += t1;
+ t1 = *(ushort*)(addr+8); mdsum += t2;
+ t2 = *(ushort*)(addr+10); mdsum += t1;
+ t1 = *(ushort*)(addr+12); mdsum += t2;
+ t2 = *(ushort*)(addr+14); mdsum += t1;
+ mdsum += t2;
+ len -= 16;
+ addr += 16;
+ }
+ while(len >= 2) {
+ mdsum += *(ushort*)addr;
+ len -= 2;
+ addr += 2;
+ }
+ if(x) {
+ if(len)
+ losum += addr[0];
+ if(LITTLE)
+ losum += mdsum;
+ else
+ hisum += mdsum;
+ } else {
+ if(len)
+ hisum += addr[0];
+ if(LITTLE)
+ hisum += mdsum;
+ else
+ losum += mdsum;
+ }
+
+ losum += hisum >> 8;
+ losum += (hisum & 0xff) << 8;
+ while(hisum = losum>>16)
+ losum = hisum + (losum & 0xffff);
+
+ return losum & 0xffff;
+}
--- /dev/null
+++ b/libip/ptclbsum386.s
@@ -1,0 +1,126 @@
+TEXT ptclbsum(SB), $0
+ MOVL addr+0(FP), SI
+ MOVL len+4(FP), CX
+
+ XORL AX, AX /* sum */
+
+ TESTL $1, SI /* byte aligned? */
+ MOVL SI, DI
+ JEQ _2align
+
+ DECL CX
+ JLT _return
+
+ MOVB 0x00(SI), AH
+ INCL SI
+
+_2align:
+ TESTL $2, SI /* word aligned? */
+ JEQ _32loop
+
+ CMPL CX, $2 /* less than 2 bytes? */
+ JLT _1dreg
+ SUBL $2, CX
+
+ XORL BX, BX
+ MOVW 0x00(SI), BX
+ ADDL BX, AX
+ ADCL $0, AX
+ LEAL 2(SI), SI
+
+_32loop:
+ CMPL CX, $0x20
+ JLT _8loop
+
+ MOVL CX, BP
+ SHRL $5, BP
+ ANDL $0x1F, CX
+
+_32loopx:
+ MOVL 0x00(SI), BX
+ MOVL 0x1C(SI), DX
+ ADCL BX, AX
+ MOVL 0x04(SI), BX
+ ADCL DX, AX
+ MOVL 0x10(SI), DX
+ ADCL BX, AX
+ MOVL 0x08(SI), BX
+ ADCL DX, AX
+ MOVL 0x14(SI), DX
+ ADCL BX, AX
+ MOVL 0x0C(SI), BX
+ ADCL DX, AX
+ MOVL 0x18(SI), DX
+ ADCL BX, AX
+ LEAL 0x20(SI), SI
+ ADCL DX, AX
+
+ DECL BP
+ JNE _32loopx
+
+ ADCL $0, AX
+
+_8loop:
+ CMPL CX, $0x08
+ JLT _2loop
+
+ MOVL CX, BP
+ SHRL $3, BP
+ ANDL $0x07, CX
+
+_8loopx:
+ MOVL 0x00(SI), BX
+ ADCL BX, AX
+ MOVL 0x04(SI), DX
+ ADCL DX, AX
+
+ LEAL 0x08(SI), SI
+ DECL BP
+ JNE _8loopx
+
+ ADCL $0, AX
+
+_2loop:
+ CMPL CX, $0x02
+ JLT _1dreg
+
+ MOVL CX, BP
+ SHRL $1, BP
+ ANDL $0x01, CX
+
+_2loopx:
+ MOVWLZX 0x00(SI), BX
+ ADCL BX, AX
+
+ LEAL 0x02(SI), SI
+ DECL BP
+ JNE _2loopx
+
+ ADCL $0, AX
+
+_1dreg:
+ TESTL $1, CX /* 1 byte left? */
+ JEQ _fold
+
+ XORL BX, BX
+ MOVB 0x00(SI), BX
+ ADDL BX, AX
+ ADCL $0, AX
+
+_fold:
+ MOVL AX, BX
+ SHRL $16, BX
+ JEQ _swab
+
+ ANDL $0xFFFF, AX
+ ADDL BX, AX
+ JMP _fold
+
+_swab:
+ TESTL $1, addr+0(FP)
+ /*TESTL $1, DI*/
+ JNE _return
+ XCHGB AH, AL
+
+_return:
+ RET
--- /dev/null
+++ b/libip/readipifc.c
@@ -1,0 +1,197 @@
+#include <u.h>
+#include <libc.h>
+#include <ip.h>
+
+static Ipifc**
+_readoldipifc(char *buf, Ipifc **l, int index)
+{
+ char *f[200];
+ int i, n;
+ Ipifc *ifc;
+ Iplifc *lifc, **ll;
+
+ /* allocate new interface */
+ *l = ifc = mallocz(sizeof(Ipifc), 1);
+ if(ifc == nil)
+ return l;
+ l = &ifc->next;
+ ifc->index = index;
+
+ n = tokenize(buf, f, nelem(f));
+ if(n < 2)
+ return l;
+
+ strncpy(ifc->dev, f[0], sizeof ifc->dev);
+ ifc->dev[sizeof(ifc->dev) - 1] = 0;
+ ifc->mtu = strtoul(f[1], nil, 10);
+
+ ll = &ifc->lifc;
+ for(i = 2; n-i >= 7; i += 7){
+ /* allocate new local address */
+ *ll = lifc = mallocz(sizeof(Iplifc), 1);
+ ll = &lifc->next;
+ parseipandmask(lifc->ip, lifc->mask, f[i], f[i+1]);
+ parseip(lifc->net, f[i+2]);
+ ifc->pktin = strtoul(f[i+3], nil, 10);
+ ifc->pktout = strtoul(f[i+4], nil, 10);
+ ifc->errin = strtoul(f[i+5], nil, 10);
+ ifc->errout = strtoul(f[i+6], nil, 10);
+ }
+ return l;
+}
+
+static char*
+findfield(char *name, char **f, int n)
+{
+ int i;
+
+ for(i = 0; i < n-1; i++)
+ if(strcmp(f[i], name) == 0)
+ return f[i+1];
+ return "";
+}
+
+static Ipifc**
+_readipifc(char *file, Ipifc **l, int index)
+{
+ int i, n, fd, lines;
+ char buf[4*1024];
+ char *line[32];
+ char *f[64];
+ Ipifc *ifc, **l0;
+ Iplifc *lifc, **ll;
+
+ /* read the file */
+ fd = open(file, OREAD);
+ if(fd < 0)
+ return l;
+ n = 0;
+ while((i = read(fd, buf+n, sizeof(buf)-1-n)) > 0 && n < sizeof(buf) - 1)
+ n += i;
+ buf[n] = 0;
+ close(fd);
+
+ if(strncmp(buf, "device", 6) != 0)
+ return _readoldipifc(buf, l, index);
+ /* ignore ifcs with no associated device */
+ if(strncmp(buf+6, " ", 2) == 0)
+ return l;
+ /* allocate new interface */
+ *l = ifc = mallocz(sizeof(Ipifc), 1);
+ if(ifc == nil)
+ return l;
+ l0 = l;
+ l = &ifc->next;
+ ifc->index = index;
+
+ lines = getfields(buf, line, nelem(line), 1, "\n");
+
+ /* pick off device specific info(first line) */
+ n = tokenize(line[0], f, nelem(f));
+ if(n%2 != 0)
+ goto lose;
+ strncpy(ifc->dev, findfield("device", f, n), sizeof(ifc->dev));
+ ifc->dev[sizeof(ifc->dev)-1] = 0;
+ if(ifc->dev[0] == 0){
+lose:
+ free(ifc);
+ *l0 = nil;
+ return l;
+ }
+ ifc->mtu = strtoul(findfield("maxtu", f, n), nil, 10);
+ ifc->sendra6 = atoi(findfield("sendra", f, n));
+ ifc->recvra6 = atoi(findfield("recvra", f, n));
+ ifc->rp.mflag = atoi(findfield("mflag", f, n));
+ ifc->rp.oflag = atoi(findfield("oflag", f, n));
+ ifc->rp.maxraint = atoi(findfield("maxraint", f, n));
+ ifc->rp.minraint = atoi(findfield("minraint", f, n));
+ ifc->rp.linkmtu = atoi(findfield("linkmtu", f, n));
+ ifc->rp.reachtime = atoi(findfield("reachtime", f, n));
+ ifc->rp.rxmitra = atoi(findfield("rxmitra", f, n));
+ ifc->rp.ttl = atoi(findfield("ttl", f, n));
+ ifc->rp.routerlt = atoi(findfield("routerlt", f, n));
+ ifc->pktin = strtoul(findfield("pktin", f, n), nil, 10);
+ ifc->pktout = strtoul(findfield("pktout", f, n), nil, 10);
+ ifc->errin = strtoul(findfield("errin", f, n), nil, 10);
+ ifc->errout = strtoul(findfield("errout", f, n), nil, 10);
+
+ /* now read the addresses */
+ ll = &ifc->lifc;
+ for(i = 1; i < lines; i++){
+ n = tokenize(line[i], f, nelem(f));
+ if(n < 5)
+ break;
+
+ /* allocate new local address */
+ *ll = lifc = mallocz(sizeof(Iplifc), 1);
+ ll = &lifc->next;
+
+ parseipandmask(lifc->ip, lifc->mask, f[0], f[1]);
+ parseip(lifc->net, f[2]);
+
+ lifc->validlt = strtoul(f[3], nil, 10);
+ lifc->preflt = strtoul(f[4], nil, 10);
+ }
+
+ return l;
+}
+
+static void
+_freeifc(Ipifc *ifc)
+{
+ Ipifc *next;
+ Iplifc *lnext, *lifc;
+
+ if(ifc == nil)
+ return;
+ for(; ifc; ifc = next){
+ next = ifc->next;
+ for(lifc = ifc->lifc; lifc; lifc = lnext){
+ lnext = lifc->next;
+ free(lifc);
+ }
+ free(ifc);
+ }
+}
+
+Ipifc*
+readipifc(char *net, Ipifc *ifc, int index)
+{
+ int fd, i, n;
+ Dir *dir;
+ char directory[128];
+ char buf[128];
+ Ipifc **l;
+
+ _freeifc(ifc);
+
+ l = &ifc;
+ ifc = nil;
+
+ if(net == 0)
+ net = "/net";
+ snprint(directory, sizeof(directory), "%s/ipifc", net);
+
+ if(index >= 0){
+ snprint(buf, sizeof(buf), "%s/%d/status", directory, index);
+ _readipifc(buf, l, index);
+ } else {
+ fd = open(directory, OREAD);
+ if(fd < 0)
+ return nil;
+ n = dirreadall(fd, &dir);
+ close(fd);
+
+ for(i = 0; i < n; i++){
+ if(strcmp(dir[i].name, "clone") == 0)
+ continue;
+ if(strcmp(dir[i].name, "stats") == 0)
+ continue;
+ snprint(buf, sizeof(buf), "%s/%s/status", directory, dir[i].name);
+ l = _readipifc(buf, l, atoi(dir[i].name));
+ }
+ free(dir);
+ }
+
+ return ifc;
+}
--- /dev/null
+++ b/libip/testreadipifc.c
@@ -1,0 +1,21 @@
+#include "u.h"
+#include "kern.h"
+#include "ip.h"
+
+void
+main(void)
+{
+ Ipifc *ifc, *list;
+ Iplifc *lifc;
+ int i;
+
+ fmtinstall('I', eipfmt);
+ fmtinstall('M', eipfmt);
+
+ list = readipifc("/net", nil, -1);
+ for(ifc = list; ifc; ifc = ifc->next){
+ print("ipifc %s %d\n", ifc->dev, ifc->mtu);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ print("\t%I %M %I\n", lifc->ip, lifc->mask, lifc->net);
+ }
+}
--- a/man/3/ip
+++ b/man/3/ip
@@ -1,111 +1,620 @@
.TH IP 3
.SH NAME
-ip \- network protocols over IP
+ip, esp, gre, icmp, icmpv6, ipmux, rudp, tcp, udp, il \- network protocols over IP
.SH SYNOPSIS
.nf
-.B bind -a #I\f1[\f5\f2ifn\f1]\f5 /net
-
+.2C
+.B bind -a #I\fIspec\fP /net
+.sp 0.3v
+.B /net/ipifc
+.B /net/ipifc/clone
+.B /net/ipifc/stats
+.BI /net/ipifc/ n
+.BI /net/ipifc/ n /status
+.BI /net/ipifc/ n /ctl
+\&...
+.sp 0.3v
.B /net/arp
.B /net/bootp
.B /net/iproute
.B /net/ipselftab
-.B /net/iprouter
.B /net/log
-
-.B /net/ipifc/clone
-.B /net/ipifc/stats
-.BI /net/ipifc/ n
-.BI /net/ipifc/ n /data
-.BI /net/ipifc/ n /ctl
-.BI /net/ipifc/ n /local
-.BI /net/ipifc/ n /status
-
-.BI /net/ proto /clone
-.BI /net/ proto /stats
-.BI /net/ proto / n
-.BI /net/ proto / n /ctl
-.BI /net/ proto / n /data
-.BI /net/ proto / n /err
-.BI /net/ proto / n /local
-.BI /net/ proto / n /remote
-.BI /net/ proto / n /status
-.BI /net/ proto / n /listen
+.B /net/ndb
+.sp 0.3v
+.B /net/esp
+.B /net/gre
+.B /net/icmp
+.B /net/icmpv6
+.B /net/ipmux
+.B /net/rudp
+.B /net/tcp
+.B /net/udp
+.B /net/il
+.sp 0.3v
+.B /net/tcp/clone
+.B /net/tcp/stats
+.BI /net/tcp/ n
+.BI /net/tcp/ n /data
+.BI /net/tcp/ n /ctl
+.BI /net/tcp/ n /local
+.BI /net/tcp/ n /remote
+.BI /net/tcp/ n /status
+.BI /net/tcp/ n /listen
\&...
+.1C
.fi
.SH DESCRIPTION
-The IP device serves a directory representing a self-contained
-collection of IP interfaces.
-There may be several instances, identified by the decimal interface number
-.IR ifn ,
-that follows the
-.B #I
-device name;
-.B #I0
-is assumed by default.
-Each instance
-has a disjoint collection of IP interfaces, routes and address resolution maps.
-A physical or virtual device, or
-.IR medium ,
-that produces IP packets is associated
-with a logical IP network using the mechanisms described under
-.I "Physical and logical interfaces"
-below.
-Commonly all IP media on a host are assigned to a single
-instance of
-.BR #I ,
-which is conventionally bound to
-.BR /net ,
-but other configurations are possible: interfaces might be assigned
-to different device instances forming separate
-logical IP networks
-to partition networks in firewall or
-gateway applications.
+The
+.I ip
+device provides the interface to Internet Protocol stacks.
+.I Spec
+is an integer starting from 0 identifying a stack.
+Each stack implements IPv4 and IPv6.
+Each stack is independent of all others:
+the only information transfer between them is via programs that
+mount multiple stacks.
+Normally a system uses only one stack.
+However multiple stacks can be used for debugging
+new IP networks or implementing firewalls or proxy
+services.
.PP
-Hosted Inferno provides a subset of the interface described here that gives
-to the TCP/IP and UDP/IP of the host system's own IP subsystem.
-See
-.IR "Hosted interfaces"
-below for a summary of the differences.
-.SS Protocols
-Within each instance,
-the IP device provides
-an interface to each IP protocol configured into the system, such as TCP/IP or UDP/IP.
+All addresses used are 16-byte IPv6 addresses.
+IPv4 addresses are a subset of the IPv6 addresses and both standard
+.SM ASCII
+formats are accepted.
+In binary representation, all v4 addresses start with the 12 bytes, in hex:
+.IP
+.EX
+00 00 00 00 00 00 00 00 00 00 ff ff
+.EE
+.
+.SS "Configuring interfaces
+Each stack may have multiple interfaces and each interface
+may have multiple addresses.
+The
+.B /net/ipifc
+directory contains a
+.B clone
+file, a
+.B stats
+file, and numbered subdirectories for each physical interface.
.PP
-Each of the protocols is served by the IP device, which represents a
-connection by a set of device files.
-The top level directory,
-.I proto
-in the
-.SM SYNOPSIS
-above,
-is named after a protocol (eg,
+Opening the
+.B clone
+file reserves an interface.
+The file descriptor returned from the
+.IR open (2)
+will point to the control file,
+.BR ctl ,
+of the newly allocated interface.
+Reading
+.B ctl
+returns a text string representing the number of the interface.
+Writing
+.B ctl
+alters aspects of the interface.
+The possible
+.I ctl
+messages are those described under
+.B "Protocol directories"
+below and these:
+.TF "\fLbind loopback\fR"
+.PD
+.
+.\" from devip.c
+.
+.TP
+.BI "bind ether " path
+Treat the device mounted at
+.I path
+as an Ethernet medium carrying IP and ARP packets
+and associate it with this interface.
+The kernel will
+.IR dial (2)
+.IR path !0x800,
+.IR path !0x86DD
+and
+.IR path !0x806
+and use the three connections for IPv4, IPv6 and
+ARP respectively.
+.TP
+.B "bind pkt
+Treat this interface as a packet interface. Assume
+a user program will read and write the
+.I data
+file to receive and transmit IP packets to the kernel.
+This is used by programs such as
+.IR ppp (8)
+to mediate IP packet transfer between the kernel and
+a PPP encoded device.
+.TP
+.BI "bind netdev " path
+Treat this interface as a packet interface.
+The kernel will open
+.I path
+and read and write the resulting file descriptor
+to receive and transmit IP packets.
+.TP
+.BI "bind loopback "
+Treat this interface as a local loopback. Anything
+written to it will be looped back.
+.
+.\" from ipifc.c
+.
+.TP
+.B "unbind
+Disassociate the physical device from an IP interface.
+.TP
+.BI add\ "local mask remote mtu " proxy
+.PD 0
+.TP
+.BI try\ "local mask remote mtu " proxy
+.PD
+Add a local IP address to the interface.
+.I Try
+adds the
+.I local
+address as a tentative address
+if it's an IPv6 address.
+The
+.IR mask ,
+.IR remote ,
+.IR mtu ,
+and
+.B proxy
+arguments are all optional.
+The default
+.I mask
+is the class mask for the local address.
+The default
+.I remote
+address is
+.I local
+ANDed with
+.IR mask .
+The default
+.I mtu
+(maximum transmission unit)
+is 1514 for Ethernet and 4096 for packet media.
+The
+.I mtu
+is the size in bytes of the largest packet that this interface can send.
+.IR Proxy ,
+if specified, means that this machine should answer
+ARP requests for the remote address.
+.IR Ppp (8)
+does this to make remote machines appear
+to be connected to the local Ethernet.
+Adding the special null-address
+.B "0.0.0.0"
+or
+.B "::"
+in
+.I local
+to a interface makes the ip stack accept all incoming
+connections regardless of the destination IP address.
+This is used temporarily by
+.IR ipconfig (8)
+to accept DHCP answers when no IP address
+has been assigned yet. This can also be used
+to implement a NAT gateway by accepting all
+incoming connections and proxying them with
+.IR trampoline (8)
+to a different ip stack.
+.TP
+.BI remove\ "local mask"
+Remove a local IP address from an interface.
+.TP
+.BI mtu\ n
+Set the maximum transfer unit for this device to
+.IR n .
+The mtu is the maximum size of the packet including any
+medium-specific headers.
+.TP
+.BI speed\ n
+Set the maximum transmit speed in bits per second.
+.TP
+.BI delay\ n
+Set the maximum burst delay in milliseconds. (Default is 40ms)
+When
+.B speed
+has been set and packets in flight exceed the maximum burst
+delay then packets send on the interface are discarded until
+the load drops below the maximum.
+.TP
+.BI iprouting\ n
+Allow
+.RI ( n
+is missing or non-zero) or disallow
+.RI ( n
+is 0) forwarding packets between this interface and others.
+.TP
+.BI reflect\ n
+When forwarding, allow packets from this interface to be
+echoed back on the same interface.
+.TP
+.BI reassemble\ n
+Reassemble IP fragments before forwarding to this interface
+.
+.\" remainder from netif.c (thus called from devether.c),
+.\" except add6 and ra6 from ipifc.c
+.
+.TP
+.B bridge
+Enable bridging (see
+.IR bridge (3)).
+.TP
+.B promiscuous
+Set the interface into promiscuous mode,
+which makes it accept all incoming packets,
+whether addressed to it or not.
+.TP
+.BI "connect " type
+marks the Ethernet packet
+.I type
+as being in use, if not already in use
+on this interface.
+A
+.I type
+of -1 means `all' but appears to be a no-op.
+.TP
+.BI addmulti\ Media-addr
+Treat the multicast
+.I Media-addr
+on this interface as a local address.
+.TP
+.BI remmulti\ Media-addr
+Remove the multicast address
+.I Media-addr
+from this interface.
+.TP
+.B scanbs
+Make the wireless interface scan for base stations.
+.TP
+.B headersonly
+Set the interface to pass only packet headers, not data too.
+.
+.\" remainder from ipifc.c; tedious, so put them last
+.
+.TP
+.BI "add6 " "v6addr pfx-len [onlink auto validlt preflt]"
+Add the local IPv6 address
+.I v6addr
+with prefix length
+.I pfx-len
+to this interface.
+See RFC 2461 §6.2.1 for more detail.
+The remaining arguments are optional:
+.RS
+.TF "\fIonlink\fR"
+.TP
+.I onlink
+flag: address is `on-link'
+.TP
+.I auto
+flag: autonomous
+.TP
+.I validlt
+valid life-time in seconds
+.TP
+.I preflt
+preferred life-time in seconds
+.RE
+.PD
+.TP
+.B remove6
+Remove local IPv6 addresses that have expired ther
+valid life-time.
+.TP
+.BI "ra6 " "keyword value ..."
+Set IPv6 router advertisement (RA) parameter
+.IR keyword 's
+.IR value .
+Known
+.IR keyword s
+and the meanings of their values follow.
+See RFC 2461 §6.2.1 for more detail.
+Flags are true iff non-zero.
+.RS
+.TF "\fLreachtime\fR"
+.TP
+.B recvra
+flag: receive and process RAs.
+.TP
+.B sendra
+flag: generate and send RAs.
+.TP
+.B mflag
+flag: ``Managed address configuration'',
+goes into RAs.
+.TP
+.B oflag
+flag: ``Other stateful configuration'',
+goes into RAs.
+.TP
+.B maxraint
+``maximum time allowed between sending unsolicited multicast''
+RAs from the interface, in ms.
+.TP
+.B minraint
+``minimum time allowed between sending unsolicited multicast''
+RAs from the interface, in ms.
+.TP
+.B linkmtu
+``value to be placed in MTU options sent by the router.''
+Zero indicates none.
+.TP
+.B reachtime
+sets the Reachable Time field in RAs sent by the router.
+``Zero means unspecified (by this router).''
+.TP
+.B rxmitra
+sets the Retrans Timer field in RAs sent by the router.
+``Zero means unspecified (by this router).''
+.TP
+.B ttl
+default value of the Cur Hop Limit field in RAs sent by the router.
+Should be set to the ``current diameter of the Internet.''
+``Zero means unspecified (by this router).''
+.TP
+.B routerlt
+sets the Router Lifetime field of RAs sent from the interface, in ms.
+Zero means the router is not to be used as a default router.
+.PD
+.RE
+.PP
+Reading the interface's
+.I status
+file returns information about the interface. The first line
+is composed of white-space-separated fields, the first two
+fields are: device and maxmtu. Subsequent lines list the
+ip addresses assigned to that inferface. The colums are:
+ip address, network mask, network address and valid/preferred
+life times in milliseconds. See
+.I readipifc
+in
+.IR ip (2).
+.
+.SS "Routing
+The file
+.I iproute
+controls information about IP routing.
+When read, it returns one line per routing entry.
+Each line contains eight white-space-separated fields:
+target address, target mask, address of next hop, flags,
+tag, interface number, source address, source mask.
+The entry used for routing an IP packet is the one with
+the longest destination and source mask for which
+destination address ANDed with target mask equals the
+target and also the source ANDed with the source mask equals
+the source address.
+The one-character flags are:
+.TF m
+.TP
+.B 4
+IPv4 route
+.TP
+.B 6
+IPv6 route
+.TP
+.B i
+local interface
+.TP
+.B b
+broadcast address
+.TP
+.B u
+local unicast address
+.TP
+.B m
+multicast route
+.TP
+.B p
+point-to-point route
+.PD
+.PP
+The tag is an arbitrary, up to 4 character, string. It is normally used to
+indicate what routing protocol originated the route.
+.PP
+Writing to
+.B /net/iproute
+changes the route table. The messages are:
+.TF "\fLtag \fIstring\fR"
+.PD
+.TP
+.BI flush\ tag
+Remove routes of the specified tag, or all routes if
+.I tag
+is omitted.
+.TP
+.BI tag\ string
+Associate the tag,
+.IR string ,
+with all subsequent routes added via this file descriptor.
+.TP
+.BI add\ "target mask nexthop"
+.TP
+.BI add\ "target mask nexthop interface"
+.TP
+.BI add\ "target mask nexthop source smask"
+.TP
+.BI add\ "target mask nexthop interface source smask"
+.TP
+.BI add\ "target mask nexthop tag interface source smask"
+.TP
+.BI add\ "target mask nexthop type tag interface source smask"
+Add the route to the table. If one already exists with the
+same target and mask, replace it. The
+.I interface
+can be given as either the interface number or a local
+IP address on the desired interface.
+.TP
+.BI remove\ "target mask"
+.TP
+.BI remove\ "target mask nexthop"
+.TP
+.BI remove\ "target mask source smask"
+.TP
+.BI remove\ "target mask nexthop source smask"
+.TP
+.BI remove\ "target mask nexthop interface source smask"
+.TP
+.BI remove\ "target mask nexthop tag interface source smask"
+.TP
+.BI remove\ "target mask nexthop type tag interface source smask"
+Remove the matching route.
+.
+.SS "Address resolution
+The file
+.B /net/arp
+controls information about address resolution.
+The kernel automatically updates the v4 ARP and v6 Neighbour Discovery
+information for Ethernet interfaces.
+When read, the file returns one line per address containing the
+type of medium, the status of the entry (OK, WAIT), the IP
+address, the medium address and the IP address of the interface
+where the entry is valid.
+Writing to
+.B /net/arp
+administers the ARP information.
+The control messages are:
+.TF "\fLdel \fIIP-addr\fR"
+.PD
+.TP
+.B flush
+Remove all entries.
+.TP
+.BI add\ "type IP-addr Media-addr Interface-IP-addr"
+Add an entry or replace an existing one for the
+same IP address. The optional interface IP address specifies the
+interface where the ARP entry will be valid. This is needed
+for IPv6 link local addresses.
+.TP
+.BI del\ "IP-addr"
+Delete an individual entry.
+.PP
+ARP entries do not time out. The ARP table is a
+cache with an LRU replacement policy. The IP stack
+listens for all ARP requests and, if the requester is in
+the table, the entry is updated.
+Also, whenever a new address is configured onto an
+Ethernet, an ARP request is sent to help
+update the table on other systems.
+.PP
+Currently, the only medium type is
+.BR ether .
+.br
+.ne 3
+.
+.SS "Debugging and stack information
+If any process is holding
+.B /net/log
+open, the IP stack queues debugging information to it.
+This is intended primarily for debugging the IP stack.
+The information provided is implementation-defined;
+see the source for details. Generally, what is returned is error messages
+about bad packets.
+.PP
+Writing to
+.B /net/log
+controls debugging. The control messages are:
+.TF "\fLclear \fIarglist\fR"
+.PD
+.TP
+.BI set\ arglist
+.I Arglist
+is a space-separated list of items for which to enable debugging.
+The possible items are:
+.BR ppp ,
+.BR ip ,
+.BR fs ,
.BR tcp ,
.BR il ,
-.BR udp )
-and contains a
+.BR icmp ,
+.BR udp ,
+.BR compress ,
+.BR ilmsg ,
+.BR gre ,
+.BR tcpwin ,
+.BR tcprxmt ,
+.BR udpmsg ,
+.BR ipmsg ,
+and
+.BR esp .
+.TP
+.BI clear\ arglist
+.I Arglist
+is a space-separated list of items for which to disable debugging.
+.TP
+.BI only\ addr
+If
+.I addr
+is non-zero, restrict debugging to only those
+packets whose source or destination is that
+address.
+.PP
+The file
+.B /net/ndb
+can be read or written by
+programs. It is normally used by
+.IR ipconfig (8)
+to leave configuration information for other programs
+such as
+.B dns
+and
+.B cs
+(see
+.IR ndb (8)).
+.B /net/ndb
+may contain up to 1024 bytes.
+.PP
+The file
+.B /net/ipselftab
+is a read-only file containing all the IP addresses
+considered local. Each line in the file contains
+three white-space-separated fields: IP address, usage count,
+and flags. The usage count is the number of interfaces to which
+the address applies. The flags are the same as for routing
+entries.
+.br
+.ne 3
+.
+.SS "Protocol directories
+The
+.I ip
+device
+supports IP as well as several protocols that run over it:
+TCP, UDP, RUDP, ICMP, IL, GRE, and ESP.
+TCP and UDP provide the standard Internet
+protocols for reliable stream and unreliable datagram
+communication.
+RUDP is a locally-developed reliable datagram protocol based on UDP.
+ICMP is IP's catch-all control protocol used to send
+low level error messages and to implement
+.IR ping (8).
+GRE is a general encapsulation protocol.
+ESP is the encapsulation protocol for IPsec.
+IL provides a reliable datagram service for communication
+between Plan 9 machines but is now deprecated.
+.PP
+Each protocol is a subdirectory of the IP stack.
+The top level directory of each protocol contains a
.B clone
file, a
.B stats
-file,
-and subdirectories numbered from zero to the number of connections
-configured for this protocol.
+file, and subdirectories numbered from zero to the number of connections
+opened for this protocol.
.PP
-The read-only
-.B stats
-file contains protocol-specific statistics as one or more lines of text.
-There is no particular format, but the values are often a superset
-of those required by the SNMP MIB.
-.PP
Opening the
.B clone
-file reserves a connection, represented by
-one of the numbered subdirectories. The resulting file descriptor
-will be open on the control file,
+file reserves a connection. The file descriptor returned from the
+.IR open (2)
+will point to the control file,
.BR ctl ,
of the newly allocated connection.
-Reading the
+Reading
.B ctl
-file returns a text
+returns a text
string representing the number of the
connection.
Connections may be used either to listen for incoming calls
@@ -115,99 +624,121 @@
.B ctl
file.
After a connection has been established data may be read from
-and written to the data file.
+and written to
+.BR data .
+A connection can be actively established using the
+.B connect
+message (see also
+.IR dial (2)).
+A connection can be established passively by first
+using an
+.B announce
+message (see
+.IR dial (2))
+to bind to a local port and then
+opening the
+.B listen
+file (see
+.IR dial (2))
+to receive incoming calls.
.PP
-Before sending data, remote and local addresses must be set for the connection.
-For outgoing calls the local port number will be allocated randomly if none is set.
-Addresses are set by writing control messages to the
-.B ctl
-file of the connection.
-The connection is not established until the data file is opened.
-There are two models depending on the nature of the protocol.
-For connection-oriented protocols, the process will block on open
-until the remote host has acknowledged the connection,
-either accepting it, causing a successful return from open,
-or rejecting it, causing open to return an appropriate error.
-For connectionless protocols, the open always succeeds;
-the `connect' request sets local parameters for the source and destination fields
-for use by subsequent read and write requests.
-.PP
-The following control messages are provided by this interface
-to all protocols.
-A particular protocol can provide additional commands, or
-change the interpretation or even syntax of those below,
-as described in the manual page for that protocol.
-The description below shows
-the standard commands with the default argument syntax and interpretation:
+The following control messages are supported:
+.TF "\fLremmulti \fIip\fR"
+.PD
.TP
-.BI connect\ ipaddress ! port "[!r]\ [\f2lport\f5]"
-Set the remote IP address and port number for the connection.
-If the
-.B r
-flag
-is supplied and the optional local port
-.I lport
-has not been specified the system will allocate
-a restricted port number (between 600 and 1024) for the connection to allow communication
-with Unix machines'
+.BI connect\ ip-address ! port "!r " local
+Establish a connection to the remote
+.I ip-address
+and
+.IR port .
+If
+.I local
+is specified, it is used as the local port number.
+If
+.I local
+is not specified but
+.B !r
+is, the system will allocate
+a restricted port number (less than 1024) for the connection to allow communication
+with Unix
.B login
and
.B exec
services.
+Otherwise a free port number starting at 5000 is chosen.
+The connect fails if the combination of local and remote address/port pairs
+are already assigned to another port.
.TP
-.BI "announce\ [" ipaddress !] port
-Set the local port
-number to
-.I port
-and accept calls to that port.
-.I Port
+.BI announce\ X
+.I X
is a decimal port number or
.LR * .
+Set the local port
+number to
+.I X
+and accept calls to
+.IR X .
If
-.I port
-is zero, assign a port number
-(the one assigned can be read from the
-.B local
-address file).
-If
-.I port
+.I X
is
.LR * ,
accept
calls for any port that no process has explicitly announced.
-If the optional
-.I ipaddress
-is given, set the local IP address for the connection
-to that address, and accept only those incoming calls to
-.I port
-that are addressed to
-.IR ipaddress .
+The local IP address cannot be set.
.B Announce
fails if the connection is already announced or connected.
.TP
-.BI bind\ port
-.I Port
+.BI bind\ X
+.I X
is a decimal port number or
.LR * .
Set the local port number to
-.IR port .
-This request exists to support emulation of
-of BSD sockets and is otherwise neither needed nor used in Inferno.
+.IR X .
+This exists to support emulation
+of BSD sockets by the APE libraries (see
+.IR pcc (1))
+and is not otherwise used.
+.\" this is gone
+.\" .TP
+.\" .BI backlog\ n
+.\" Set the maximum number of unanswered (queued) incoming
+.\" connections to an announced port to
+.\" .IR n .
+.\" By default
+.\" .I n
+.\" is set to five. If more than
+.\" .I n
+.\" connections are pending,
+.\" further requests for a service will be rejected.
.TP
-.BI tos " \f1[\f2 n \f1]\f2"
-Set the type-of-service value in outgooing packets to
-.I n
-(default: 0).
+.BI ttl\ n
+Set the time to live IP field in outgoing packets to
+.IR n .
.TP
-.BI ttl " \f1[\f2 n \f1]\f2"
-Set the time-to-live (TTL) value in packets transmitted on this conversation
-to
-.I n
-(default: 255).
+.BI tos\ n
+Set the service type IP field in outgoing packets to
+.IR n .
+.TP
+.B ignoreadvice
+Don't break (UDP) connections because of ICMP errors.
+.TP
+.BI addmulti\ "ifc-ip [ mcast-ip ]"
+Treat
+.I ifc-ip
+on this multicast interface as a local address.
+If
+.I mcast-ip
+is present,
+use it as the interface's multicast address.
+.TP
+.BI remmulti\ ip
+Remove the address
+.I ip
+from this multicast interface.
.PP
Port numbers must be in the range 1 to 32767.
.PP
-Several read-only files report the status of a
+Several files report the status of a
connection.
The
.B remote
@@ -214,714 +745,633 @@
and
.B local
files contain the IP address and port number for the remote and local side of the
-connection.
-The
+connection. The
.B status
file contains protocol-dependent information to help debug network connections.
-The first word on the first line gives the status of the
-connection.
+On receiving and error or EOF reading or writing the
+.B data
+file, the
+.B err
+file contains the reason for error.
.PP
-Having announced, a process may accept incoming connections by calling
-.B open
-on the
+A process may accept incoming connections by
+.IR open (2)ing
+the
.B listen
file.
The
.B open
-will block until a new connection request arrives;
-it will then
-return an open file descriptor that points to the control file of the
+will block until a new connection request arrives.
+Then
+.B open
+will return an open file descriptor which points to the control file of the
newly accepted connection.
-Repeating this procedure will accept all calls for the
+This procedure will accept all calls for the
given protocol.
-.PP
-In general it should not be necessary to use the file system interface to the
-networks.
-The
-.BR dial ,
-.BR announce ,
-and
-.BR listen
-functions described in
-.IR dial (2)
-perform the necessary I/O to establish and
-manipulate network connections.
-.SS TCP protocol
-The TCP protocol is the standard Internet
-protocol for reliable stream communication; it does not preserve
-read/write
-boundaries.
-.PP
-A connection is controlled by writing text strings to the associated
+See
+.IR dial (2).
+.
+.SS TCP
+TCP connections are reliable point-to-point byte streams; there are no
+message delimiters.
+A connection is determined by the address and port numbers of the two
+ends.
+TCP
.B ctl
-file.
-After a connection has been established data may be read from
-and written to the data file.
-The TCP protocol provides a stream connection that does not preserve
-read/write
-boundaries.
-.PP
-For outgoing calls the local port number will be allocated randomly if none is set.
-Addresses are set by writing control messages to the
-.B ctl
-file of the connection.
-The connection is not established until the data file is opened.
-For TCP the
-process will block until the remote host has acknowledged the connection.
-.PP
-As well as the standard control messages above,
-TCP accepts the following:
+files support the following additional messages:
+.TF "\fLkeepalive\fI n\fR"
+.PD
.TP
-.BI hangup
-Send a TCP reset (RST) to the remote side and end the conversation,
-without waiting for untransmitted data to be acknowledged,
-unlike a normal close of the device.
+.B hangup
+close down this TCP connection
.TP
-.BI keepalive\ [ "n" ]
-Enable `keep alive'
-mode:
-if no traffic crosses the link within a given period, send a
-packet to check that the remote party is still there, and remind
-it that the local connection is still live.
-The optional value
-.I n
-gives the keep-alive time in milliseconds (default: 120000).
-.PP
-The
-.B status
-file has many lines, each containing a labelled number, giving the values
-of parameters and statistics such as:
-maximum allowed connections, outgoing calls, incoming calls, established but later reset,
-active calls, input segments, output segments, retransmitted segments, retransmitted timeouts,
-input errors, transmitted reset.
-.SS UDP protocol
-.PP
-UDP provides the standard Internet
-protocol for unreliable datagram
-communication.
-.PP
-UDP opens always succeed.
-Before sending data, remote and local addresses must be set for the connection.
-Alternatively, the following special control requests can be used:
+.B close
+graceful hangup
.TP
-.B headers
-Set the connection to use an address header with IPv6 addressing
-on reads and writes of the data file,
-allowing a single connection to send datagrams to converse with
-many different destination addresses and ports.
-The 52 byte binary header appears before the data
-read or written.
-It contains: remote IP address, local IP address, interface IP address, remote port, and local port.
-The IP addresses are 16 bytes each in IPv6 format, and
-the port addresses are 2 bytes each, all written in network (big-endian) order.
-On reads, the header gives the values from the incoming datagram,
-except that if the remote used a multicast destination address, the IP address
-of the receiving interface is substituted.
-On writes, the header provides the destination for the resulting datagram,
-and if the local IP address corresponds to a valid local unicast interface,
-that address is used, otherwise the IP address of the transmitting interface
-is substituted.
+.BI keepalive \ n
+turn on keep alive messages.
+.IR N ,
+if given, is the milliseconds between keepalives
+(default 30000).
.TP
-.B headers4
-Set the connection to use an address header with IPv4 addresses
-on reads and writes of the data file,
-allowing a single connection to send datagrams to converse with
-many different destination addresses and ports.
-The 12 byte binary header appears before the data
-read or written.
-It contains: remote IP address, local IP address, remote port, and local port.
-The IP addresses are 4 bytes each,
-the port addresses are 2 bytes each, all written in network (big-endian) order.
-On reads, the header gives the values from the incoming datagram.
-On writes, the header provides the destination for the resulting datagram.
-This mode is obsolete and destined for oblivion.
+.BI checksum \ n
+emit TCP checksums of zero if
+.I n
+is zero; otherwise, and by default,
+TCP checksums are computed and sent normally.
+.TP
+.BI tcpporthogdefense \ onoff
+.I onoff
+of
+.L on
+enables the TCP port-hog defense for all TCP connections;
+.I onoff
+of
+.L off
+disables it.
+The defense is a solution to hijacked systems staking out ports
+as a form of denial-of-service attack.
+To avoid stateless TCP conversation hogs,
+.I ip
+picks a TCP sequence number at random for keepalives.
+If that number gets acked by the other end,
+.I ip
+shuts down the connection.
+Some firewalls,
+notably ones that perform stateful inspection,
+discard such out-of-specification keepalives,
+so connections through such firewalls
+will be killed after five minutes
+by the lack of keepalives.
+.
+.SS UDP
+UDP connections carry unreliable and unordered datagrams. A read from
+.B data
+will return the next datagram, discarding anything
+that doesn't fit in the read buffer.
+A write is sent as a single datagram.
.PP
-A read of less than
-the size of the datagram will cause the entire datagram to be consumed.
-Each write to the data file will send a single datagram on the network.
+By default, a UDP connection is a point-to-point link.
+Either a
+.B connect
+establishes a local and remote address/port pair or
+after an
+.BR announce ,
+each datagram coming from a different remote address/port pair
+establishes a new incoming connection.
+However, many-to-one semantics is also possible.
.PP
-In replies, in connection-oriented mode, if the remote address
-has not been set, the first arriving packet sets the following
-based on the source of the incoming datagram:
-the remote address and port for the conversation,
-and the local address is set to the destination address in the
-datagram unless that is a multicast address, and then the address
-of the receiving interface is used.
+If, after an
+.BR announce ,
+the message
+.L headers
+is written to
+.BR ctl ,
+then all messages sent to the announced port
+are received on the announced connection prefixed
+with the corresponding structure,
+declared in
+.BR <ip.h> :
+.IP
+.EX
+typedef struct Udphdr Udphdr;
+struct Udphdr
+{
+ uchar raddr[16]; /* V6 remote address and port */
+ uchar laddr[16]; /* V6 local address and port */
+ uchar ifcaddr[16]; /* V6 interface address (receive only) */
+ uchar rport[2]; /* remote port */
+ uchar lport[2]; /* local port */
+};
+.EE
.PP
-If a conversation is in
-.B headers
-mode, only the local port is relevant.
+Before a write, a user must prefix a similar structure to each message.
+The system overrides the user specified local port with the announced
+one. If the user specifies an address that isn't a unicast address in
+.BR /net/ipselftab ,
+that too is overridden.
+Since the prefixed structure is the same in read and write, it is relatively
+easy to write a server that responds to client requests by just copying new
+data into the message body and then writing back the same buffer that was
+read.
.PP
-Connection-oriented UDP is hungup if an ICMP error (eg, host or port unreachable,
-or time exceeded) arrives with matching port.
-.PP
-The
-.I udp
-.B status
-file contains four lines, each containing a labelled number counting an event:
-input datagrams, datagrams on unannounced ports, datagrams with wrong checksum, and output datagrams.
-.SS IL Protocol
-IL provides a reliable point-to-point datagram service for communication between Plan 9 and
-native Inferno machines.
-Each read and write transfers a single datagram, as for UDP.
-The datagrams are delivered reliably and in order.
-Conversations are addressed and established as for TCP.
-.SS Routing
+In this case (writing
+.L headers
+to the
+.I ctl
+file),
+no
+.I listen
+nor
+.I accept
+is needed;
+otherwise,
+the usual sequence of
+.IR announce ,
+.IR listen ,
+.I accept
+must be executed before performing I/O on the corresponding
+.I data
+file.
+.
+.SS RUDP
+RUDP is a reliable datagram protocol based on UDP,
+currently only for IPv4.
+Packets are delivered in order.
+RUDP does not support
+.BR listen .
+One must write either
+.L connect
+or
+.L announce
+followed immediately by
+.L headers
+to
+.BR ctl .
.PP
-The
-.B iproute
-file can be read and written.
-When read, it returns the contents of the IP routing tables,
-one line per entry,
-with six fields giving the
-destination host or network address, address mask,
-gateway address, route type, tag (see below), and the number of the
-.B ipifc
-interface owning the route
-(or
-.RB ` - '
-if none).
-The route type is up to four characters:
-.B 4
-or
-.B 6
-(IPv4 or IPv6 route);
-.B i
-(route is interface);
-one of
-.B u
-(unicast),
-.B b
-(broadcast),
-or
-.B m
-(multicast);
-and lastly
-.B p
-if the route is point-to-point.
+Unlike TCP, the reboot of one end of a connection does
+not force a closing of the connection. Communications will
+resume when the rebooted machine resumes talking. Any unacknowledged
+packets queued before the reboot will be lost. A reboot can
+be detected by reading the
+.B err
+file. It will contain the message
+.IP
+.BI hangup\ address ! port
.PP
-Commands can also be written to control the routing:
+where
+.I address
+and
+.I port
+are of the far side of the connection.
+Retransmitting a datagram more than 10 times
+is treated like a reboot:
+all queued messages are dropped, an error is queued to the
+.B err
+file, and the conversation resumes.
+.PP
+RUDP
+.I ctl
+files accept the following messages:
+.TF "\fLranddrop \fI[ percent ]\fR"
.TP
-.BI add " ip mask gw \f1[\f2 tag \f1]\f2"
-Add a route via the gateway identified by IP address
-.I gw
-to the address specified by
-.I ip
-and subnet mask
-.IR mask .
-Tag the resulting table entry with the
-.I tag
-provided, or the current
-.I tag
-(see
-.B tag
-below),
-or the tag
-.BR none .
+.B headers
+Corresponds to the
+.L headers
+format of UDP.
.TP
-.BI flush " \f1[\f2 tag \f1]\f2"
-Remove all routes with the given
-.I tag
-that do not correspond to a local interface.
-If
-.I tag
-is not given, flush all routes.
+.BI "hangup " "IP port"
+Drop the connection to address
+.I IP
+and
+.IR port .
.TP
-.BI remove " ip mask"
-Remove routes to the given address.
-.TP
-.BI tag " tag"
-Tag the routes generated by writes on the current file descriptor with
-the given
-.IR tag
-of up to 4 characters.
-The default is
-.BR none ,
-set when
-.B iproute
-is opened.
+.BI "randdrop " "[ percent ]"
+Randomly drop
+.I percent
+of outgoing packets.
+Default is 10%.
+.
+.SS ICMP
+ICMP is a datagram protocol for IPv4 used to exchange control requests and
+their responses with other machines' IP implementations.
+ICMP is primarily a kernel-to-kernel protocol, but it is possible
+to generate `echo request' and read `echo reply' packets from user programs.
+.
+.SS ICMPV6
+ICMPv6 is the IPv6 equivalent of ICMP.
+If, after an
+.BR announce ,
+the message
+.L headers
+is written to
+.BR ctl ,
+then before a write,
+a user must prefix each message with a corresponding structure,
+declared in
+.BR <ip.h> :
+.IP
+.EX
+/*
+ * user level icmpv6 with control message "headers"
+ */
+typedef struct Icmp6hdr Icmp6hdr;
+struct Icmp6hdr {
+ uchar unused[8];
+ uchar laddr[IPaddrlen]; /* local address */
+ uchar raddr[IPaddrlen]; /* remote address */
+};
+.EE
.PP
-The
-.B ipselftab
-file summarises the addresses and routes that refer to the local host.
-It gives an address, the number of logical interfaces, and the interface type
-in the same form as the route type of
-.BR iproute .
+In this case (writing
+.L headers
+to the
+.I ctl
+file),
+no
+.I listen
+nor
+.I accept
+is needed;
+otherwise,
+the usual sequence of
+.IR announce ,
+.IR listen ,
+.I accept
+must be executed before performing I/O on the corresponding
+.I data
+file.
+.
+.SS IL
+IL is a reliable point-to-point datagram protocol that runs over IPv4.
+Like TCP, IL delivers datagrams
+reliably and in order. Also like TCP, a connection is
+determined by the address and port numbers of the two ends.
+Like UDP, each read and write transfers a single datagram.
.PP
-The
-.B iprouter
-file is provided for use by a user-level application acting as an IP gateway.
-It is effective only when the kernel-level gateway is not enabled
-(see the
-.B iprouting
-interface control request below).
-Once opened, packets that are not addressed to a
-local address can be read from this device.
-The packet contents are preceded by a 16 byte binary header that
-gives the IPv6 address of the local interface that received the packet.
-.SS Bootstrap
+IL is efficient for LANs but doesn't have the
+congestion control features needed for use through
+the Internet.
+It is no longer necessary, except to communicate with old standalone
+.IR fs (4)
+file servers.
+Its use is now deprecated.
+.
+.SS GRE
+GRE is the encapsulation protocol used by PPTP.
+The kernel implements just enough of the protocol
+to multiplex it.
+Our implementation encapsulates in IPv4, per RFC 1702.
+.B Announce
+is not allowed in GRE, only
+.BR connect .
+Since GRE has no port numbers, the port number in the connect
+is actually the 16 bit
+.B eproto
+field in the GRE header.
.PP
-The read-only
-.B bootp
-file contains the results of the last BOOTP
-request transmitted on any interface (see
-.I "Physical and logical interfaces"
-below)
-as several lines of text,
-with two fields each.
-The first field names an entity and the second field gives its value in IPv4 address format.
-The current entities are:
-.IP
-.RS
-.TF ipaddr
-.TP
-.B auip
-Authentication server address
-.TP
-.B fsip
-File server address
-.TP
-.B gwip
-Address of an IP gateway out of this (sub)net.
-.TP
-.B ipaddr
-Local IP address
-.TP
-.B ipmask
-Subnet mask for the local IP address
-.RE
+Reads and writes transfer a
+GRE datagram starting at the GRE header.
+On write, the kernel fills in the
+.B eproto
+field with the port number specified
+in the connect message.
+.br
+.ne 3
+.
+.SS ESP
+ESP is the Encapsulating Security Payload (RFC 1827, obsoleted by RFC 4303)
+for IPsec (RFC 4301).
+We currently implement only tunnel mode, not transport mode.
+It is used to set up an encrypted tunnel between machines.
+Like GRE, ESP has no port numbers. Instead, the
+port number in the
+.B connect
+message is the SPI (Security Association Identifier (sic)).
+IP packets are written to and read from
+.BR data .
+The kernel encrypts any packets written to
+.BR data ,
+appends a MAC, and prefixes an ESP header before
+sending to the other end of the tunnel.
+Received packets are checked against their MAC's,
+decrypted, and queued for reading from
+.BR data .
+In the following,
+.I secret
+is the hexadecimal encoding of a key,
+without a leading
+.LR 0x .
+The control messages are:
+.TF "\fLesp \fIalg secret\fR"
.PD
-.PP
-If any value is unknown (no reply to BOOTP, or value unspecified),
-the value will be zero, represented as
-.BR 0.0.0.0 .
-.SS Address resolution
-The
-.B arp
-file can be read and written.
-When read,
-it returns the contents of the current ARP cache as a sequence of lines,
-one per map entry, giving
-type, state, IP address and corresponding MAC address.
-Several textual commands can be written to it:
.TP
-.BI add " \f1[\f2 medium \f1]\f2 ip mac"
-Add a mapping from IP address
-.I ip
-to the given
-.I mac
-address (a sequence of bytes in hexadecimal)
-on the given
-.IR medium .
-It must support address resolution (eg, Ethernet).
-If the
-.I medium
-is not specified, find the one associated with a route to
-.I ip
-(which must be IPv4).
+.BI esp\ "alg secret
+Encrypt with the algorithm,
+.IR alg ,
+using
+.I secret
+as the key.
+Possible algorithms are:
+.BR null ,
+.BR des_56_cbc ,
+.BR des3_cbc ,
+and eventually
+.BR aes_128_cbc ,
+and
+.BR aes_ctr .
.TP
-.B flush
-Clear the cache.
-.SS Logging
-.PP
-The
-.B log
-file provides protocol tracing and debugging data.
-While the file is held open, the system
-saves, in a small circular buffer, error messages logged by selected protocols.
-When read, it returns data not previously read,
-blocking until there is data to read.
-The following commands can be written to determine what is logged:
+.BI ah\ "alg secret
+Use the hash algorithm,
+.IR alg ,
+with
+.I secret
+as the key for generating the MAC.
+Possible algorithms are:
+.BR null ,
+.BR hmac_sha1_96 ,
+.BR hmac_md5_96 ,
+and eventually
+.BR aes_xcbc_mac_96 .
.TP
-.BI set " proto ..."
-Enable logging of messages from each source
-.IR proto ,
-one or more of:
-.BR ppp ,
-.BR ip ,
-.BR fs ,
-.BR tcp ,
-.BR il ,
-.BR icmp ,
-.BR udp ,
-.BR compress ,
-.BR ilmsg ,
-.BR gre ,
-.BR tcpmsg ,
-.BR udpmsg ,
-.BR ipmsg
-and
-.BR esp .
+.B header
+Turn on header mode. Every buffer read from
+.B data
+starts with 4 unused bytes, and the first 4 bytes
+of every buffer written to
+.B data
+are ignored.
.TP
-.BI clear " proto ..."
-Disable logging of messages from the given sources.
-.SS Physical and logical interfaces
-The configuration of the physical and logical IP interfaces
-in a given instance of
-.B #I
-uses
-a virtual protocol
-.B ipifc
-within that instance,
-that adds, controls and removes
-IP interfaces.
-It is represented by the protocol directory
-.BR ipifc .
-Each connection corresponds to an interface to a physical or virtual medium on
-which IP packets can be sent and received.
-It has a set of associated values:
-minimum and maximum transfer unit,
-MAC address, and a set of logical IP interfaces.
-Each logical IP interface has local and remote addresses and an address mask.
+.B noheader
+Turn off header mode.
+.
+.SS "IP packet filter
+The directory
+.B /net/ipmux
+looks like another protocol directory.
+It is a packet filter built on top of IP.
+Each numbered
+subdirectory represents a different filter.
+The connect messages written to the
+.I ctl
+file describe the filter. Packets matching the filter can be read on the
+.B data
+file. Packets written to the
+.B data
+file are routed to an interface and transmitted.
.PP
-Opening the
-.B clone
-file returns a file descriptor open on the
-.B ctl
-file for a new connection.
-A medium is then attached using a
-.B bind
-request;
-logical interfaces are associated by
-.B connect
-or
-.BR add ;
-they are removed by
-.BR remove ;
-and finally
-.B unbind
-detaches the medium from the connection.
-For certain types of media, the
-.B unbind
-is automatic when the connection itself is closed.
-With most media, including Ethernet,
-the
-.B ipifc
-connection files can be closed after configuration, and later
-reopened if need be to add or remove logical interfaces,
-or set other parameters.
-.PP
-The
-.B ctl
-file responds to the following text commands, including interface-specific variants
-of standard
-IP device
-requests:
+A filter is a semicolon-separated list of
+relations. Each relation describes a portion
+of a packet to match. The possible relations are:
+.TF "\fLdata[\fIn\fL:\fIm\fL]=\fIexpr\fR "
+.PD
.TP
-.BI bind " medium " "[ \f5\f2name\f5 [ \f2arg ...\f5 ]"
-Attach device
-.I medium
-to the interface, which must not already be bound to a device.
-The
-.I name
-and subsequent arguments are interpreted by the driver for the
-.IR medium .
-The device name associated with the interface is
-.IR name ,
-if given, or a generated name otherwise.
+.BI ver= n
+the IP version must be
+.IR n .
.TP
-.BR connect " \f2ip\f5 [\f2mask \f5[\f2remote \f5[\f2mtu \f5]]]"
-Remove all existing logical interfaces and create a new one as if by
-.B add
-(see below).
-The connection must be bound to a medium.
+.BI proto= n
+the IP protocol number must be
+.IR n .
.TP
-.BR add " \f2ip\f5 [\f2 mask \f5[\f2 remote \f5[\f2 mtu \f5] ] ]"
-Add a logical interface with local IP address
-.IR ip .
-The default for
-.I mask
-is the mask for
-.IR ip 's
-address class;
-for the
-.IR remote
-address,
-.IR ip 's
-network; and for
-.IR mtu ,
-the largest MTU allowed by the medium.
-The new interface is registered in the IP routing tables.
-.TP
-.B bootp
-Broadcast a BOOTP packet (using
-.BR udp ).
-If a valid response is received, set the interface's IP address and mask,
-and the IP stack's default gateway to the results obtained from BOOTP.
-The results are also available to applications by reading
-the
-.B bootp
-file above.
-Note that this mechanism is now deprecated in favour of
-.IR dhcpclient (2).
-.TP
-.BI remove " ip mask"
-Remove the logical interface determined by
-.I ip
-and
-.IR mask .
-.TP
-.BI iprouting\ [ "n" ]
-Control the use of IP routing on this
-.IR ip (3)
-instance.
-If
+.BI data[ n : m ]= expr
+bytes
.I n
-is missing or non-zero, allow use as a gateway,
-rerouting via one interface packets received on another.
-By default,
-or if
-.I n
-is zero, use as a gateway is not allowed: if a packet received
-is not addressed to any local interface, either pass it to
-a gateway application if active (see
-.B iprouter
-in
-.IR ip (3)),
-and otherwise drop the packet.
+through
+.I m
+following the IP header must match
+.IR expr .
.TP
-.BI mtu " n"
-.br
-Set the maximum transmit unit (MTU) on this interface to
+.BI iph[ n : m ]= expr
+bytes
.I n
-bytes, which must be valid for the medium.
+through
+.I m
+of the IP packet header must match
+.IR expr .
.TP
-.BI addmulti " multi"
-Add the multicast address
-.I multi
-to the interface.
+.BI ifc= expr
+the packet must have been received on an interface whose address
+matches
+.IR expr .
.TP
-.BI remmulti " multi"
-Remove the multicast address
-.I multi
-from the interface.
+.BI src= expr
+The source address in the packet must match
+.IR expr .
.TP
-.BI unbind
-Remove any association between
-the current medium (device) and the connection:
-remove all routes using this interface, detach the device,
-stop packet transport, and
-remove all logical interfaces.
-The connection is ready for re-use.
+.BI dst= expr
+The destination address in the packet must match
+.IR expr .
.PP
-The
-.B local
-file contains one line for each logical interface, of the form:
-.IP
-.IB local -> self ...
-.PP
-where
-.I local
-is the local address associated with the interface and each
-.I self
-is a broadcast or multicast address that can address that interface,
-including subnet addresses, if any.
-.PP
-The
-.B status
-file contains many fields:
-the first two give the device name and the value of the current MTU,
-followed by 7 fields per line for each logical interface:
-local address, address mask, remote address, packets in, packets out, input errors, and output errors.
-.PP
-The following sections describe the media drivers available.
-Each is separately configurable into a kernel.
-.SS Ethernet medium
-Ethernet devices as described in
-.IR ether (3)
-can be bound to an IP interface.
-The bind request has the form:
-.IP
-.BI "bind ether " device
-.PP
-The interface opens two conversations on the given Ethernet
-.IR device ,
-for instance
-.BR ether0 ,
-using an internal version of
-.BR dial ,
-with the addresses
-.IB device !0x800
-(IPv4)
-and
-.IB device !0x806
-(ARP).
-See
-.IR dial (2)
-for the interpretation of such addresses.
-The interface runs until a process does an explicit
-.BR unbind .
-Multicast settings made on the interface are propagated to the
-.IR device .
-.SS Point-to-point medium
-An asynchronous serial device as described in
-.IR eia (3)
-can be bound to an interface as a Point-to-Point protocol (PPP) device.
-The bind request has the form:
-.IP
-.BI "bind ppp " "serial ip remote mtu framing username secret"
-.PP
-All parameters except
-.I serial
-are optional.
-The character
-.RB ` - '
-can appear as a placeholder for any parameter.
-Except for authentication data, an attempt is made to negotiate
-suitable values for any missing parameter values, including network addresses.
-The parameters are interpreted as follows:
-.IP
-.RS
-.TF username
+.I Expr
+is of the form:
.TP
-.I serial
-Name of the device that will run PPP.
+.I \ value
.TP
-.I ip
-Local IP address for the interface.
+.IB \ value | value | ...
.TP
-.I remote
-IP address of the other end of the link.
+.IB \ value & mask
.TP
-.I mtu
-Initial MTU value for negotiation (default: 1450)
-.TP
-.I framing
-If
-.I framing
-is zero, do not provide asynch. framing (on by default).
-Unimplemented.
-.TP
-.I username
-Identification string used in PAP or CHAP authentication.
-.TP
-.I secret
-Secret used in authentication; with CHAP it never crosses the link.
-.PD
-.RE
+.IB \ value | value & mask
.PP
-If the name
-.I serial
-contains
-.RB ` ! '
-a connection will be opened using
-.B dial
-(see
-.IR dial (2)).
-Otherwise the name will be opened as-is;
-usually it is the name of a serial device
-(eg,
-.BR "#t/eia0" ).
-In the latter case, a companion
-.B ctl
-file will also be opened if possible, to set serial characteristics for PPP
-(flow control, 64kbyte queue size, nonblocking writes).
-An attempt is made to start the PPP link immediately.
-The write of the
-.B bind
-control message returns with an error if the link cannot be started,
-or if negotiation fails.
-The PPP link is automatically unbound if the line hangs up (eg, modem drops carrier),
-or an unrecoverable error occurs when reading or writing the connection.
-.PP
-The PPP implementation can use either PAP and CHAP authentication,
-as negotiated, provided an appropriate
-.I username
+If a mask is given, the relevant field is first ANDed with
+the mask. The result is compared against the value or list
+of values for a match. In the case of
+.BR ifc ,
+.BR dst ,
and
-.I secret
-is given in the
-.B bind
-request.
-It does not yet support the Microsoft authentication scheme.
-.SS Packet medium
-The packet medium allows an application to be source and sink
-for IP packets.
-It is bound to an interface by the simple request:
-.IP
-.B "bind pkt"
-.PP
-All other interface parameters including its IP address are
-set using the standard
-.I ipifc
-requests described above.
-Once that has been done, the application reads the
-.B data
-file of the interface to receive packets addressed to the interface,
-and it writes to the file to inject packets into the IP network.
-The interface is automatically unbound when all interface files are closed.
-.SS Hosted interfaces
-Native Inferno and Plan 9 have related IP implementations.
-Plan 9
-.I emu
-therefore simply imports Plan 9's
-.BR /net ,
-and in the absence of version-specific differences, what is described
-above still applies.
-.PP
-On all other hosted platforms,
-the IP device gives applications
-within
-.IR emu (1)
-a portable interface to TCP/IP and UDP/IP, even through it
-is ultimately using the host system's own TCP/IP and UDP/IP implementations
-(usually but not always socket based).
-The interface remains the same: for instance by
-.B /net/tcp
+.B src
+the value is a dot-formatted IP address and the mask is a dot-formatted
+IP mask. In the case of
+.BR data ,
+.B iph
and
-.BR /net/udp ,
-but is currently more limited in the set of services and control requests.
-Both IPv4 and IPv6 address syntax may be used, but the IPv6 form must
-still map to the IPv4 address space if the IPv6 support is not configured into
-.IR emu .
-Only TCP and UDP are generally available, and a limited interface to ARP on some platforms (see below).
-The set of TCP/UDP control requests is limited to:
-.BR connect ,
-.BR announce ,
-.BR bind ,
-.BR ttl ,
-.BR tos ,
-.BR ignoreadvice ,
-.BR headers4 ,
-.BR oldheaders ,
-.BR headers ,
-.BR hangup
-and
-.BR keepalive .
+.BR proto ,
+both value and mask are strings of 2 hexadecimal digits representing
+8-bit values.
.PP
-The write-only
-.B arp
-file is implemented only on some Unix systems, and
-is intended to allow the implementation of
-the BOOTP protocol
-using Inferno, on hosted systems.
-It accepts a single textual control request:
-.TP
-.BI add " ip ether"
-Add a new ARP map entry, or replace an existing one, for IP address
-.IR ip ,
-associating it with the given
-.I ether
-MAC address.
-The
-.I ip
-address is expressed in the usual dotted address notation;
-.I ether
-is a 12 digit hexadecimal number.
+A packet is delivered to only one filter.
+The filters are merged into a single comparison tree.
+If two filters match the same packet, the following
+rules apply in order (here '>' means is preferred to):
+.IP 1)
+protocol > data > source > destination > interface
+.IP 2)
+lower data offsets > higher data offsets
+.IP 3)
+longer matches > shorter matches
+.IP 4)
+older > younger
.PP
-An error results if the host system does not allow the ARP map
-to be set, or the current user lacks the privileges to set it.
-.SH SOURCE
-.B /emu/port/devip.c
+So far this has just been used to implement a version of
+OSPF in Inferno
+and 6to4 tunnelling.
.br
-.B /os/ip/devip.c
+.ne 5
+.
+.SS Statistics
+The
+.B stats
+files are read only and contain statistics useful to network monitoring.
.br
-.BI /os/ip/ proto .c
+.ne 12
+.PP
+Reading
+.B /net/ipifc/stats
+returns a list of 19 tagged and newline-separated fields representing:
+.EX
+.ft 1
+.2C
+.in +0.25i
+forwarding status (0 and 2 mean forwarding off,
+ 1 means on)
+default TTL
+input packets
+input header errors
+input address errors
+packets forwarded
+input packets for unknown protocols
+input packets discarded
+input packets delivered to higher level protocols
+output packets
+output packets discarded
+output packets with no route
+timed out fragments in reassembly queue
+requested reassemblies
+successful reassemblies
+failed reassemblies
+successful fragmentations
+unsuccessful fragmentations
+fragments created
+.in -0.25i
+.1C
+.ft
+.EE
.br
-.B /os/ip/ipifc.c
-.br
-.br
-.B /os/ip/*medium.c
+.ne 16
+.PP
+Reading
+.B /net/icmp/stats
+returns a list of 26 tagged and newline-separated fields representing:
+.EX
+.ft 1
+.2C
+.in +0.25i
+messages received
+bad received messages
+unreachables received
+time exceededs received
+input parameter problems received
+source quenches received
+redirects received
+echo requests received
+echo replies received
+timestamps received
+timestamp replies received
+address mask requests received
+address mask replies received
+messages sent
+transmission errors
+unreachables sent
+time exceededs sent
+input parameter problems sent
+source quenches sent
+redirects sent
+echo requests sent
+echo replies sent
+timestamps sent
+timestamp replies sent
+address mask requests sent
+address mask replies sent
+.in -0.25i
+.1C
+.EE
+.PP
+Reading
+.B /net/tcp/stats
+returns a list of 11 tagged and newline-separated fields representing:
+.EX
+.ft 1
+.2C
+.in +0.25i
+maximum number of connections
+total outgoing calls
+total incoming calls
+number of established connections to be reset
+number of currently established connections
+segments received
+segments sent
+segments retransmitted
+retransmit timeouts
+bad received segments
+transmission failures
+.in -0.25i
+.1C
+.EE
+.PP
+Reading
+.B /net/udp/stats
+returns a list of 4 tagged and newline-separated fields representing:
+.EX
+.ft 1
+.2C
+.in +0.25i
+datagrams received
+datagrams received for bad ports
+malformed datagrams received
+datagrams sent
+.in -0.25i
+.1C
+.EE
+.PP
+Reading
+.B /net/il/stats
+returns a list of 6 tagged and newline-separated fields representing:
+.EX
+.ft 1
+.2C
+.in +0.25i
+checksum errors
+header length errors
+out of order messages
+retransmitted messages
+duplicate messages
+duplicate bytes
+.in -0.25i
+.1C
+.EE
+.PP
+Reading
+.B /net/gre/stats
+returns a list of 1 tagged number representing:
+.EX
+.ft 1
+.in +0.25i
+header length errors
+.in -0.25i
+.EE
.SH "SEE ALSO"
-.IR dial (2)
-.\" joinmulti and leavemulti are unimplemented
-.\" many media are only partly implemented
+.IR dial (2),
+.IR ip (2),
+.IR bridge (3),
+.\" .IR ike (4),
+.IR ndb (6),
+.IR listen (8)
+.br
+.PD 0
+.TF "\fL/lib/rfc/rfc2822"
+.TP
+.B /lib/rfc/rfc2460
+IPv6
+.TP
+.B /lib/rfc/rfc4291
+IPv6 address architecture
+.TP
+.B /lib/rfc/rfc4443
+ICMPv6
+.SH SOURCE
+.B /sys/src/9/ip
+.SH BUGS
+.I Ipmux
+has not been heavily used and should be considered experimental.
+It may disappear in favor of a more traditional packet filter in the future.
--- /dev/null
+++ b/man/3/ip.original
@@ -1,0 +1,927 @@
+.TH IP 3
+.SH NAME
+ip \- network protocols over IP
+.SH SYNOPSIS
+.nf
+.B bind -a #I\f1[\f5\f2ifn\f1]\f5 /net
+
+.B /net/arp
+.B /net/bootp
+.B /net/iproute
+.B /net/ipselftab
+.B /net/iprouter
+.B /net/log
+
+.B /net/ipifc/clone
+.B /net/ipifc/stats
+.BI /net/ipifc/ n
+.BI /net/ipifc/ n /data
+.BI /net/ipifc/ n /ctl
+.BI /net/ipifc/ n /local
+.BI /net/ipifc/ n /status
+
+.BI /net/ proto /clone
+.BI /net/ proto /stats
+.BI /net/ proto / n
+.BI /net/ proto / n /ctl
+.BI /net/ proto / n /data
+.BI /net/ proto / n /err
+.BI /net/ proto / n /local
+.BI /net/ proto / n /remote
+.BI /net/ proto / n /status
+.BI /net/ proto / n /listen
+\&...
+.fi
+.SH DESCRIPTION
+The IP device serves a directory representing a self-contained
+collection of IP interfaces.
+There may be several instances, identified by the decimal interface number
+.IR ifn ,
+that follows the
+.B #I
+device name;
+.B #I0
+is assumed by default.
+Each instance
+has a disjoint collection of IP interfaces, routes and address resolution maps.
+A physical or virtual device, or
+.IR medium ,
+that produces IP packets is associated
+with a logical IP network using the mechanisms described under
+.I "Physical and logical interfaces"
+below.
+Commonly all IP media on a host are assigned to a single
+instance of
+.BR #I ,
+which is conventionally bound to
+.BR /net ,
+but other configurations are possible: interfaces might be assigned
+to different device instances forming separate
+logical IP networks
+to partition networks in firewall or
+gateway applications.
+.PP
+Hosted Inferno provides a subset of the interface described here that gives
+to the TCP/IP and UDP/IP of the host system's own IP subsystem.
+See
+.IR "Hosted interfaces"
+below for a summary of the differences.
+.SS Protocols
+Within each instance,
+the IP device provides
+an interface to each IP protocol configured into the system, such as TCP/IP or UDP/IP.
+.PP
+Each of the protocols is served by the IP device, which represents a
+connection by a set of device files.
+The top level directory,
+.I proto
+in the
+.SM SYNOPSIS
+above,
+is named after a protocol (eg,
+.BR tcp ,
+.BR il ,
+.BR udp )
+and contains a
+.B clone
+file, a
+.B stats
+file,
+and subdirectories numbered from zero to the number of connections
+configured for this protocol.
+.PP
+The read-only
+.B stats
+file contains protocol-specific statistics as one or more lines of text.
+There is no particular format, but the values are often a superset
+of those required by the SNMP MIB.
+.PP
+Opening the
+.B clone
+file reserves a connection, represented by
+one of the numbered subdirectories. The resulting file descriptor
+will be open on the control file,
+.BR ctl ,
+of the newly allocated connection.
+Reading the
+.B ctl
+file returns a text
+string representing the number of the
+connection.
+Connections may be used either to listen for incoming calls
+or to initiate calls to other machines.
+.PP
+A connection is controlled by writing text strings to the associated
+.B ctl
+file.
+After a connection has been established data may be read from
+and written to the data file.
+.PP
+Before sending data, remote and local addresses must be set for the connection.
+For outgoing calls the local port number will be allocated randomly if none is set.
+Addresses are set by writing control messages to the
+.B ctl
+file of the connection.
+The connection is not established until the data file is opened.
+There are two models depending on the nature of the protocol.
+For connection-oriented protocols, the process will block on open
+until the remote host has acknowledged the connection,
+either accepting it, causing a successful return from open,
+or rejecting it, causing open to return an appropriate error.
+For connectionless protocols, the open always succeeds;
+the `connect' request sets local parameters for the source and destination fields
+for use by subsequent read and write requests.
+.PP
+The following control messages are provided by this interface
+to all protocols.
+A particular protocol can provide additional commands, or
+change the interpretation or even syntax of those below,
+as described in the manual page for that protocol.
+The description below shows
+the standard commands with the default argument syntax and interpretation:
+.TP
+.BI connect\ ipaddress ! port "[!r]\ [\f2lport\f5]"
+Set the remote IP address and port number for the connection.
+If the
+.B r
+flag
+is supplied and the optional local port
+.I lport
+has not been specified the system will allocate
+a restricted port number (between 600 and 1024) for the connection to allow communication
+with Unix machines'
+.B login
+and
+.B exec
+services.
+.TP
+.BI "announce\ [" ipaddress !] port
+Set the local port
+number to
+.I port
+and accept calls to that port.
+.I Port
+is a decimal port number or
+.LR * .
+If
+.I port
+is zero, assign a port number
+(the one assigned can be read from the
+.B local
+address file).
+If
+.I port
+is
+.LR * ,
+accept
+calls for any port that no process has explicitly announced.
+If the optional
+.I ipaddress
+is given, set the local IP address for the connection
+to that address, and accept only those incoming calls to
+.I port
+that are addressed to
+.IR ipaddress .
+.B Announce
+fails if the connection is already announced or connected.
+.TP
+.BI bind\ port
+.I Port
+is a decimal port number or
+.LR * .
+Set the local port number to
+.IR port .
+This request exists to support emulation of
+of BSD sockets and is otherwise neither needed nor used in Inferno.
+.TP
+.BI tos " \f1[\f2 n \f1]\f2"
+Set the type-of-service value in outgooing packets to
+.I n
+(default: 0).
+.TP
+.BI ttl " \f1[\f2 n \f1]\f2"
+Set the time-to-live (TTL) value in packets transmitted on this conversation
+to
+.I n
+(default: 255).
+.PP
+Port numbers must be in the range 1 to 32767.
+.PP
+Several read-only files report the status of a
+connection.
+The
+.B remote
+and
+.B local
+files contain the IP address and port number for the remote and local side of the
+connection.
+The
+.B status
+file contains protocol-dependent information to help debug network connections.
+The first word on the first line gives the status of the
+connection.
+.PP
+Having announced, a process may accept incoming connections by calling
+.B open
+on the
+.B listen
+file.
+The
+.B open
+will block until a new connection request arrives;
+it will then
+return an open file descriptor that points to the control file of the
+newly accepted connection.
+Repeating this procedure will accept all calls for the
+given protocol.
+.PP
+In general it should not be necessary to use the file system interface to the
+networks.
+The
+.BR dial ,
+.BR announce ,
+and
+.BR listen
+functions described in
+.IR dial (2)
+perform the necessary I/O to establish and
+manipulate network connections.
+.SS TCP protocol
+The TCP protocol is the standard Internet
+protocol for reliable stream communication; it does not preserve
+read/write
+boundaries.
+.PP
+A connection is controlled by writing text strings to the associated
+.B ctl
+file.
+After a connection has been established data may be read from
+and written to the data file.
+The TCP protocol provides a stream connection that does not preserve
+read/write
+boundaries.
+.PP
+For outgoing calls the local port number will be allocated randomly if none is set.
+Addresses are set by writing control messages to the
+.B ctl
+file of the connection.
+The connection is not established until the data file is opened.
+For TCP the
+process will block until the remote host has acknowledged the connection.
+.PP
+As well as the standard control messages above,
+TCP accepts the following:
+.TP
+.BI hangup
+Send a TCP reset (RST) to the remote side and end the conversation,
+without waiting for untransmitted data to be acknowledged,
+unlike a normal close of the device.
+.TP
+.BI keepalive\ [ "n" ]
+Enable `keep alive'
+mode:
+if no traffic crosses the link within a given period, send a
+packet to check that the remote party is still there, and remind
+it that the local connection is still live.
+The optional value
+.I n
+gives the keep-alive time in milliseconds (default: 120000).
+.PP
+The
+.B status
+file has many lines, each containing a labelled number, giving the values
+of parameters and statistics such as:
+maximum allowed connections, outgoing calls, incoming calls, established but later reset,
+active calls, input segments, output segments, retransmitted segments, retransmitted timeouts,
+input errors, transmitted reset.
+.SS UDP protocol
+.PP
+UDP provides the standard Internet
+protocol for unreliable datagram
+communication.
+.PP
+UDP opens always succeed.
+Before sending data, remote and local addresses must be set for the connection.
+Alternatively, the following special control requests can be used:
+.TP
+.B headers
+Set the connection to use an address header with IPv6 addressing
+on reads and writes of the data file,
+allowing a single connection to send datagrams to converse with
+many different destination addresses and ports.
+The 52 byte binary header appears before the data
+read or written.
+It contains: remote IP address, local IP address, interface IP address, remote port, and local port.
+The IP addresses are 16 bytes each in IPv6 format, and
+the port addresses are 2 bytes each, all written in network (big-endian) order.
+On reads, the header gives the values from the incoming datagram,
+except that if the remote used a multicast destination address, the IP address
+of the receiving interface is substituted.
+On writes, the header provides the destination for the resulting datagram,
+and if the local IP address corresponds to a valid local unicast interface,
+that address is used, otherwise the IP address of the transmitting interface
+is substituted.
+.TP
+.B headers4
+Set the connection to use an address header with IPv4 addresses
+on reads and writes of the data file,
+allowing a single connection to send datagrams to converse with
+many different destination addresses and ports.
+The 12 byte binary header appears before the data
+read or written.
+It contains: remote IP address, local IP address, remote port, and local port.
+The IP addresses are 4 bytes each,
+the port addresses are 2 bytes each, all written in network (big-endian) order.
+On reads, the header gives the values from the incoming datagram.
+On writes, the header provides the destination for the resulting datagram.
+This mode is obsolete and destined for oblivion.
+.PP
+A read of less than
+the size of the datagram will cause the entire datagram to be consumed.
+Each write to the data file will send a single datagram on the network.
+.PP
+In replies, in connection-oriented mode, if the remote address
+has not been set, the first arriving packet sets the following
+based on the source of the incoming datagram:
+the remote address and port for the conversation,
+and the local address is set to the destination address in the
+datagram unless that is a multicast address, and then the address
+of the receiving interface is used.
+.PP
+If a conversation is in
+.B headers
+mode, only the local port is relevant.
+.PP
+Connection-oriented UDP is hungup if an ICMP error (eg, host or port unreachable,
+or time exceeded) arrives with matching port.
+.PP
+The
+.I udp
+.B status
+file contains four lines, each containing a labelled number counting an event:
+input datagrams, datagrams on unannounced ports, datagrams with wrong checksum, and output datagrams.
+.SS IL Protocol
+IL provides a reliable point-to-point datagram service for communication between Plan 9 and
+native Inferno machines.
+Each read and write transfers a single datagram, as for UDP.
+The datagrams are delivered reliably and in order.
+Conversations are addressed and established as for TCP.
+.SS Routing
+.PP
+The
+.B iproute
+file can be read and written.
+When read, it returns the contents of the IP routing tables,
+one line per entry,
+with six fields giving the
+destination host or network address, address mask,
+gateway address, route type, tag (see below), and the number of the
+.B ipifc
+interface owning the route
+(or
+.RB ` - '
+if none).
+The route type is up to four characters:
+.B 4
+or
+.B 6
+(IPv4 or IPv6 route);
+.B i
+(route is interface);
+one of
+.B u
+(unicast),
+.B b
+(broadcast),
+or
+.B m
+(multicast);
+and lastly
+.B p
+if the route is point-to-point.
+.PP
+Commands can also be written to control the routing:
+.TP
+.BI add " ip mask gw \f1[\f2 tag \f1]\f2"
+Add a route via the gateway identified by IP address
+.I gw
+to the address specified by
+.I ip
+and subnet mask
+.IR mask .
+Tag the resulting table entry with the
+.I tag
+provided, or the current
+.I tag
+(see
+.B tag
+below),
+or the tag
+.BR none .
+.TP
+.BI flush " \f1[\f2 tag \f1]\f2"
+Remove all routes with the given
+.I tag
+that do not correspond to a local interface.
+If
+.I tag
+is not given, flush all routes.
+.TP
+.BI remove " ip mask"
+Remove routes to the given address.
+.TP
+.BI tag " tag"
+Tag the routes generated by writes on the current file descriptor with
+the given
+.IR tag
+of up to 4 characters.
+The default is
+.BR none ,
+set when
+.B iproute
+is opened.
+.PP
+The
+.B ipselftab
+file summarises the addresses and routes that refer to the local host.
+It gives an address, the number of logical interfaces, and the interface type
+in the same form as the route type of
+.BR iproute .
+.PP
+The
+.B iprouter
+file is provided for use by a user-level application acting as an IP gateway.
+It is effective only when the kernel-level gateway is not enabled
+(see the
+.B iprouting
+interface control request below).
+Once opened, packets that are not addressed to a
+local address can be read from this device.
+The packet contents are preceded by a 16 byte binary header that
+gives the IPv6 address of the local interface that received the packet.
+.SS Bootstrap
+.PP
+The read-only
+.B bootp
+file contains the results of the last BOOTP
+request transmitted on any interface (see
+.I "Physical and logical interfaces"
+below)
+as several lines of text,
+with two fields each.
+The first field names an entity and the second field gives its value in IPv4 address format.
+The current entities are:
+.IP
+.RS
+.TF ipaddr
+.TP
+.B auip
+Authentication server address
+.TP
+.B fsip
+File server address
+.TP
+.B gwip
+Address of an IP gateway out of this (sub)net.
+.TP
+.B ipaddr
+Local IP address
+.TP
+.B ipmask
+Subnet mask for the local IP address
+.RE
+.PD
+.PP
+If any value is unknown (no reply to BOOTP, or value unspecified),
+the value will be zero, represented as
+.BR 0.0.0.0 .
+.SS Address resolution
+The
+.B arp
+file can be read and written.
+When read,
+it returns the contents of the current ARP cache as a sequence of lines,
+one per map entry, giving
+type, state, IP address and corresponding MAC address.
+Several textual commands can be written to it:
+.TP
+.BI add " \f1[\f2 medium \f1]\f2 ip mac"
+Add a mapping from IP address
+.I ip
+to the given
+.I mac
+address (a sequence of bytes in hexadecimal)
+on the given
+.IR medium .
+It must support address resolution (eg, Ethernet).
+If the
+.I medium
+is not specified, find the one associated with a route to
+.I ip
+(which must be IPv4).
+.TP
+.B flush
+Clear the cache.
+.SS Logging
+.PP
+The
+.B log
+file provides protocol tracing and debugging data.
+While the file is held open, the system
+saves, in a small circular buffer, error messages logged by selected protocols.
+When read, it returns data not previously read,
+blocking until there is data to read.
+The following commands can be written to determine what is logged:
+.TP
+.BI set " proto ..."
+Enable logging of messages from each source
+.IR proto ,
+one or more of:
+.BR ppp ,
+.BR ip ,
+.BR fs ,
+.BR tcp ,
+.BR il ,
+.BR icmp ,
+.BR udp ,
+.BR compress ,
+.BR ilmsg ,
+.BR gre ,
+.BR tcpmsg ,
+.BR udpmsg ,
+.BR ipmsg
+and
+.BR esp .
+.TP
+.BI clear " proto ..."
+Disable logging of messages from the given sources.
+.SS Physical and logical interfaces
+The configuration of the physical and logical IP interfaces
+in a given instance of
+.B #I
+uses
+a virtual protocol
+.B ipifc
+within that instance,
+that adds, controls and removes
+IP interfaces.
+It is represented by the protocol directory
+.BR ipifc .
+Each connection corresponds to an interface to a physical or virtual medium on
+which IP packets can be sent and received.
+It has a set of associated values:
+minimum and maximum transfer unit,
+MAC address, and a set of logical IP interfaces.
+Each logical IP interface has local and remote addresses and an address mask.
+.PP
+Opening the
+.B clone
+file returns a file descriptor open on the
+.B ctl
+file for a new connection.
+A medium is then attached using a
+.B bind
+request;
+logical interfaces are associated by
+.B connect
+or
+.BR add ;
+they are removed by
+.BR remove ;
+and finally
+.B unbind
+detaches the medium from the connection.
+For certain types of media, the
+.B unbind
+is automatic when the connection itself is closed.
+With most media, including Ethernet,
+the
+.B ipifc
+connection files can be closed after configuration, and later
+reopened if need be to add or remove logical interfaces,
+or set other parameters.
+.PP
+The
+.B ctl
+file responds to the following text commands, including interface-specific variants
+of standard
+IP device
+requests:
+.TP
+.BI bind " medium " "[ \f5\f2name\f5 [ \f2arg ...\f5 ]"
+Attach device
+.I medium
+to the interface, which must not already be bound to a device.
+The
+.I name
+and subsequent arguments are interpreted by the driver for the
+.IR medium .
+The device name associated with the interface is
+.IR name ,
+if given, or a generated name otherwise.
+.TP
+.BR connect " \f2ip\f5 [\f2mask \f5[\f2remote \f5[\f2mtu \f5]]]"
+Remove all existing logical interfaces and create a new one as if by
+.B add
+(see below).
+The connection must be bound to a medium.
+.TP
+.BR add " \f2ip\f5 [\f2 mask \f5[\f2 remote \f5[\f2 mtu \f5] ] ]"
+Add a logical interface with local IP address
+.IR ip .
+The default for
+.I mask
+is the mask for
+.IR ip 's
+address class;
+for the
+.IR remote
+address,
+.IR ip 's
+network; and for
+.IR mtu ,
+the largest MTU allowed by the medium.
+The new interface is registered in the IP routing tables.
+.TP
+.B bootp
+Broadcast a BOOTP packet (using
+.BR udp ).
+If a valid response is received, set the interface's IP address and mask,
+and the IP stack's default gateway to the results obtained from BOOTP.
+The results are also available to applications by reading
+the
+.B bootp
+file above.
+Note that this mechanism is now deprecated in favour of
+.IR dhcpclient (2).
+.TP
+.BI remove " ip mask"
+Remove the logical interface determined by
+.I ip
+and
+.IR mask .
+.TP
+.BI iprouting\ [ "n" ]
+Control the use of IP routing on this
+.IR ip (3)
+instance.
+If
+.I n
+is missing or non-zero, allow use as a gateway,
+rerouting via one interface packets received on another.
+By default,
+or if
+.I n
+is zero, use as a gateway is not allowed: if a packet received
+is not addressed to any local interface, either pass it to
+a gateway application if active (see
+.B iprouter
+in
+.IR ip (3)),
+and otherwise drop the packet.
+.TP
+.BI mtu " n"
+.br
+Set the maximum transmit unit (MTU) on this interface to
+.I n
+bytes, which must be valid for the medium.
+.TP
+.BI addmulti " multi"
+Add the multicast address
+.I multi
+to the interface.
+.TP
+.BI remmulti " multi"
+Remove the multicast address
+.I multi
+from the interface.
+.TP
+.BI unbind
+Remove any association between
+the current medium (device) and the connection:
+remove all routes using this interface, detach the device,
+stop packet transport, and
+remove all logical interfaces.
+The connection is ready for re-use.
+.PP
+The
+.B local
+file contains one line for each logical interface, of the form:
+.IP
+.IB local -> self ...
+.PP
+where
+.I local
+is the local address associated with the interface and each
+.I self
+is a broadcast or multicast address that can address that interface,
+including subnet addresses, if any.
+.PP
+The
+.B status
+file contains many fields:
+the first two give the device name and the value of the current MTU,
+followed by 7 fields per line for each logical interface:
+local address, address mask, remote address, packets in, packets out, input errors, and output errors.
+.PP
+The following sections describe the media drivers available.
+Each is separately configurable into a kernel.
+.SS Ethernet medium
+Ethernet devices as described in
+.IR ether (3)
+can be bound to an IP interface.
+The bind request has the form:
+.IP
+.BI "bind ether " device
+.PP
+The interface opens two conversations on the given Ethernet
+.IR device ,
+for instance
+.BR ether0 ,
+using an internal version of
+.BR dial ,
+with the addresses
+.IB device !0x800
+(IPv4)
+and
+.IB device !0x806
+(ARP).
+See
+.IR dial (2)
+for the interpretation of such addresses.
+The interface runs until a process does an explicit
+.BR unbind .
+Multicast settings made on the interface are propagated to the
+.IR device .
+.SS Point-to-point medium
+An asynchronous serial device as described in
+.IR eia (3)
+can be bound to an interface as a Point-to-Point protocol (PPP) device.
+The bind request has the form:
+.IP
+.BI "bind ppp " "serial ip remote mtu framing username secret"
+.PP
+All parameters except
+.I serial
+are optional.
+The character
+.RB ` - '
+can appear as a placeholder for any parameter.
+Except for authentication data, an attempt is made to negotiate
+suitable values for any missing parameter values, including network addresses.
+The parameters are interpreted as follows:
+.IP
+.RS
+.TF username
+.TP
+.I serial
+Name of the device that will run PPP.
+.TP
+.I ip
+Local IP address for the interface.
+.TP
+.I remote
+IP address of the other end of the link.
+.TP
+.I mtu
+Initial MTU value for negotiation (default: 1450)
+.TP
+.I framing
+If
+.I framing
+is zero, do not provide asynch. framing (on by default).
+Unimplemented.
+.TP
+.I username
+Identification string used in PAP or CHAP authentication.
+.TP
+.I secret
+Secret used in authentication; with CHAP it never crosses the link.
+.PD
+.RE
+.PP
+If the name
+.I serial
+contains
+.RB ` ! '
+a connection will be opened using
+.B dial
+(see
+.IR dial (2)).
+Otherwise the name will be opened as-is;
+usually it is the name of a serial device
+(eg,
+.BR "#t/eia0" ).
+In the latter case, a companion
+.B ctl
+file will also be opened if possible, to set serial characteristics for PPP
+(flow control, 64kbyte queue size, nonblocking writes).
+An attempt is made to start the PPP link immediately.
+The write of the
+.B bind
+control message returns with an error if the link cannot be started,
+or if negotiation fails.
+The PPP link is automatically unbound if the line hangs up (eg, modem drops carrier),
+or an unrecoverable error occurs when reading or writing the connection.
+.PP
+The PPP implementation can use either PAP and CHAP authentication,
+as negotiated, provided an appropriate
+.I username
+and
+.I secret
+is given in the
+.B bind
+request.
+It does not yet support the Microsoft authentication scheme.
+.SS Packet medium
+The packet medium allows an application to be source and sink
+for IP packets.
+It is bound to an interface by the simple request:
+.IP
+.B "bind pkt"
+.PP
+All other interface parameters including its IP address are
+set using the standard
+.I ipifc
+requests described above.
+Once that has been done, the application reads the
+.B data
+file of the interface to receive packets addressed to the interface,
+and it writes to the file to inject packets into the IP network.
+The interface is automatically unbound when all interface files are closed.
+.SS Hosted interfaces
+Native Inferno and Plan 9 have related IP implementations.
+Plan 9
+.I emu
+therefore simply imports Plan 9's
+.BR /net ,
+and in the absence of version-specific differences, what is described
+above still applies.
+.PP
+On all other hosted platforms,
+the IP device gives applications
+within
+.IR emu (1)
+a portable interface to TCP/IP and UDP/IP, even through it
+is ultimately using the host system's own TCP/IP and UDP/IP implementations
+(usually but not always socket based).
+The interface remains the same: for instance by
+.B /net/tcp
+and
+.BR /net/udp ,
+but is currently more limited in the set of services and control requests.
+Both IPv4 and IPv6 address syntax may be used, but the IPv6 form must
+still map to the IPv4 address space if the IPv6 support is not configured into
+.IR emu .
+Only TCP and UDP are generally available, and a limited interface to ARP on some platforms (see below).
+The set of TCP/UDP control requests is limited to:
+.BR connect ,
+.BR announce ,
+.BR bind ,
+.BR ttl ,
+.BR tos ,
+.BR ignoreadvice ,
+.BR headers4 ,
+.BR oldheaders ,
+.BR headers ,
+.BR hangup
+and
+.BR keepalive .
+.PP
+The write-only
+.B arp
+file is implemented only on some Unix systems, and
+is intended to allow the implementation of
+the BOOTP protocol
+using Inferno, on hosted systems.
+It accepts a single textual control request:
+.TP
+.BI add " ip ether"
+Add a new ARP map entry, or replace an existing one, for IP address
+.IR ip ,
+associating it with the given
+.I ether
+MAC address.
+The
+.I ip
+address is expressed in the usual dotted address notation;
+.I ether
+is a 12 digit hexadecimal number.
+.PP
+An error results if the host system does not allow the ARP map
+to be set, or the current user lacks the privileges to set it.
+.SH SOURCE
+.B /emu/port/devip.c
+.br
+.B /os/ip/devip.c
+.br
+.BI /os/ip/ proto .c
+.br
+.B /os/ip/ipifc.c
+.br
+.br
+.B /os/ip/*medium.c
+.SH "SEE ALSO"
+.IR dial (2)
+.\" joinmulti and leavemulti are unimplemented
+.\" many media are only partly implemented
--- a/os/ip.original/ipifc.c
+++ b/os/ip.original/ipifc.c
@@ -1555,7 +1555,7 @@
/*
* remove a multicast address from an interface, called with c locked
*/
-void
+extern void
ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
{
Ipmulti *multi, **l;
--- a/os/ip/arp.c
+++ b/os/ip/arp.c
@@ -58,7 +58,7 @@
f->arp->f = f;
f->arp->rxmt = nil;
f->arp->dropf = f->arp->dropl = nil;
- kproc("rxmitproc", rxmitproc, f->arp);
+ kproc("rxmitproc", rxmitproc, f->arp, 0);
}
static void
--- a/os/ip/devip.c
+++ b/os/ip/devip.c
@@ -183,7 +183,7 @@
case Qtopdir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%ud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
@@ -206,7 +206,7 @@
case Qprotodir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%ud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
@@ -306,7 +306,7 @@
}
static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, int nname)
+ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
{
IPaux *a = c->aux;
Walkqid* w;
@@ -318,8 +318,8 @@
}
-static int
-ipstat(Chan* c, uchar* db, int n)
+static s32
+ipstat(Chan* c, uchar* db, s32 n)
{
return devstat(c, db, n, nil, 0, ipgen);
}
@@ -340,7 +340,7 @@
};
static Chan*
-ipopen(Chan* c, int omode)
+ipopen(Chan* c, u32 omode)
{
Conv *cv, *nc;
Proto *p;
@@ -484,13 +484,6 @@
return c;
}
-static Chan*
-ipcreate(Chan*, char*, int, ulong)
-{
- error(Eperm);
- return 0;
-}
-
static void
ipremove(Chan*)
{
@@ -497,8 +490,8 @@
error(Eperm);
}
-static int
-ipwstat(Chan *c, uchar *dp, int n)
+static s32
+ipwstat(Chan *c, uchar *dp, s32 n)
{
Dir *dir;
Conv *cv;
@@ -613,8 +606,8 @@
Statelen= 32*1024,
};
-static long
-ipread(Chan *ch, void *a, long n, vlong off)
+static s32
+ipread(Chan *ch, void *a, s32 n, s64 off)
{
Conv *c;
Proto *x;
@@ -636,7 +629,7 @@
case Qarp:
return arpread(f->arp, a, offset, n);
case Qbootp:
- return bootpread(a, offset, n);
+ return 0 /*TODO bootpread(a, offset, n)*/;
case Qndb:
return readstr(offset, a, n, f->ndb);
case Qiproute:
@@ -705,7 +698,7 @@
}
static Block*
-ipbread(Chan* ch, long n, ulong offset)
+ipbread(Chan* ch, s32 n, u32 offset)
{
Conv *c;
Proto *x;
@@ -1077,8 +1070,8 @@
c->ttl = atoi(cb->f[1]);
}
-static long
-ipwrite(Chan* ch, void *v, long n, vlong off)
+static s32
+ipwrite(Chan* ch, void *v, s32 n, s64 off)
{
Conv *c;
Proto *x;
@@ -1177,8 +1170,8 @@
return n;
}
-static long
-ipbwrite(Chan* ch, Block* bp, ulong offset)
+static s32
+ipbwrite(Chan* ch, Block* bp, u32 offset)
{
Conv *c;
Proto *x;
@@ -1210,7 +1203,7 @@
ipwalk,
ipstat,
ipopen,
- ipcreate,
+ devcreate,
ipclose,
ipread,
ipbread,
@@ -1449,7 +1442,7 @@
ulong
scalednconv(void)
{
- if(cpuserver && conf.npage*BY2PG >= 128*MB)
+ if(conf.npage*BY2PG >= 128*MB)
return Nchans*4;
return Nchans;
}
--- a/os/ip/ethermedium.c
+++ b/os/ip/ethermedium.c
@@ -205,9 +205,9 @@
ifc->arg = er;
- kproc("etherread4", etherread4, ifc);
- kproc("etherread6", etherread6, ifc);
- kproc("recvarpproc", recvarpproc, ifc);
+ kproc("etherread4", etherread4, ifc, 0);
+ kproc("etherread6", etherread6, ifc, 0);
+ kproc("recvarpproc", recvarpproc, ifc, 0);
}
/*
--- a/os/ip/icmp.c
+++ b/os/ip/icmp.c
@@ -476,7 +476,7 @@
return p - buf;
}
-void
+extern void
icmpinit(Fs *fs)
{
Proto *icmp;
--- a/os/ip/icmp6.c
+++ b/os/ip/icmp6.c
@@ -849,7 +849,7 @@
icmpclose(c);
}
-void
+extern void
icmp6init(Fs *fs)
{
Proto *icmp6 = smalloc(sizeof(Proto));
--- a/os/ip/il.c
+++ b/os/ip/il.c
@@ -1251,7 +1251,7 @@
qlock(&ipriv->apl);
if(ipriv->ackprocstarted == 0){
sprint(kpname, "#I%dilack", c->p->f->dev);
- kproc(kpname, ilackproc, c->p);
+ kproc(kpname, ilackproc, c->p, 0);
ipriv->ackprocstarted = 1;
}
qunlock(&ipriv->apl);
--- a/os/ip/inferno.c
+++ /dev/null
@@ -1,28 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-
-/*
- * some hacks for commonality twixt inferno and plan9
- */
-
-char*
-commonuser(void)
-{
- return up->user;
-}
-
-char*
-commonerror(void)
-{
- return up->errstr;
-}
-
-int
-bootpread(char*, ulong, int)
-{
- return 0;
-}
--- a/os/ip/ipifc.c
+++ b/os/ip/ipifc.c
@@ -510,7 +510,7 @@
f = ifc->conv->p->f;
if(waserror()){
wunlock(ifc);
- return up->errstr;
+ return up->env->errstr;
}
if(mtu > 0)
@@ -866,7 +866,7 @@
return ipstats(ipifc->f, buf, len);
}
-void
+extern void
ipifcinit(Fs *f)
{
Proto *ipifc;
@@ -1100,7 +1100,7 @@
qunlock(f->self);
}
-long
+extern long
ipselftabread(Fs *f, char *cp, ulong offset, int n)
{
int i, m, nifc, off;
@@ -1375,7 +1375,7 @@
/*
* find the local address for a remote destination
*/
-void
+extern void
findlocalip(Fs *f, uchar *local, uchar *remote)
{
if(isv4(remote)) {
@@ -1528,7 +1528,7 @@
ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
{
if(waserror()){
- print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+ print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->env->errstr);
return;
}
if(ifc->m != nil && ifc->m->areg != nil)
--- a/os/ip/ipmux.c
+++ b/os/ip/ipmux.c
@@ -815,7 +815,7 @@
return n;
}
-void
+extern void
ipmuxinit(Fs *f)
{
Proto *ipmux;
--- a/os/ip/loopbackmedium.c
+++ b/os/ip/loopbackmedium.c
@@ -33,7 +33,7 @@
lb->q = qopen(1024*1024, Qmsg, nil, nil);
ifc->arg = lb;
- kproc("loopbackread", loopbackread, ifc);
+ kproc("loopbackread", loopbackread, ifc, 0);
}
--- a/os/ip/netdevmedium.c
+++ b/os/ip/netdevmedium.c
@@ -55,7 +55,7 @@
ifc->arg = er;
- kproc("netdevread", netdevread, ifc);
+ kproc("netdevread", netdevread, ifc, 0);
}
/*
--- /dev/null
+++ b/os/ip/plan9.c
@@ -1,0 +1,36 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ip.h"
+
+/*
+ * some hacks for commonality twixt inferno and plan9
+ */
+
+char*
+commonuser(void)
+{
+ return up->env->user;
+}
+
+Chan*
+commonfdtochan(int fd, int mode, int a, int b)
+{
+ return fdtochan(up->env->fgrp, fd, mode, a, b);
+}
+
+char*
+commonerror(void)
+{
+ return up->env->errstr;
+}
+
+int
+postnote(Proc *p, int, char *, int)
+{
+ swiproc(p, 0);
+ return 0;
+}
--- a/os/ip/rudp.c
+++ b/os/ip/rudp.c
@@ -203,7 +203,7 @@
qlock(&rpriv->apl);
if(rpriv->ackprocstarted == 0){
sprint(kpname, "#I%drudpack", rudp->f->dev);
- kproc(kpname, relackproc, rudp);
+ kproc(kpname, relackproc, rudp, 0);
rpriv->ackprocstarted = 1;
}
qunlock(&rpriv->apl);
--- a/os/ip/tcp.c
+++ b/os/ip/tcp.c
@@ -964,7 +964,7 @@
qlock(&tpriv->apl);
if(tpriv->ackprocstarted == 0){
snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
- kproc(kpname, tcpackproc, s->p);
+ kproc(kpname, tcpackproc, s->p, 0);
tpriv->ackprocstarted = 1;
}
qunlock(&tpriv->apl);
@@ -3376,7 +3376,7 @@
tcb->timer.start = x;
}
-void
+extern void
tcpinit(Fs *fs)
{
Proto *tcp;
--- a/os/ip/udp.c
+++ b/os/ip/udp.c
@@ -570,7 +570,7 @@
upriv->ustats.udpOutDatagrams);
}
-void
+extern void
udpinit(Fs *fs)
{
Proto *udp;
--- a/os/pc64/mkfile
+++ b/os/pc64/mkfile
@@ -53,7 +53,7 @@
fns.h\
io.h\
-CFLAGS=-wFVT -I$ROOT/Inferno/$OBJTYPE/include -I$ROOT/include -I$ROOT/libinterp -I../port
+CFLAGS=-wFVT -I$ROOT/Inferno/$OBJTYPE/include -I$ROOT/include -I$ROOT/libinterp -I$ROOT/libip -I../port
KERNDATE=`{$NDATE}
default:V: i$CONF
--- a/os/pc64/pc64
+++ b/os/pc64/pc64
@@ -13,9 +13,9 @@
ssl
cap
- ether netif netaux ethermedium
+ ether netif ethermedium
# bridge netif log
- ip bootp ip ipv6 ipaux iproute arp netlog ptclbsum iprouter plan9 nullmedium pktmedium nat
+ ip ip ipv6 ipaux iproute arp chandial netlog plan9 nullmedium pktmedium
draw screen vga vgax cga
# mouse mouse
@@ -55,6 +55,7 @@
mp
math
kern
+ ip
link
## ether82557 pci
--- a/os/port/netaux.c
+++ /dev/null
@@ -1,67 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "../port/netif.h"
-
-
-void
-hnputv(void *p, vlong v)
-{
- uchar *a;
-
- a = p;
- hnputl(a, v>>32);
- hnputl(a+4, v);
-}
-
-void
-hnputl(void *p, ulong v)
-{
- uchar *a;
-
- a = p;
- a[0] = v>>24;
- a[1] = v>>16;
- a[2] = v>>8;
- a[3] = v;
-}
-
-void
-hnputs(void *p, ushort v)
-{
- uchar *a;
-
- a = p;
- a[0] = v>>8;
- a[1] = v;
-}
-
-vlong
-nhgetv(void *p)
-{
- uchar *a;
-
- a = p;
- return ((vlong)nhgetl(a) << 32) | nhgetl(a+4);
-}
-
-ulong
-nhgetl(void *p)
-{
- uchar *a;
-
- a = p;
- return (a[0]<<24)|(a[1]<<16)|(a[2]<<8)|(a[3]<<0);
-}
-
-ushort
-nhgets(void *p)
-{
- uchar *a;
-
- a = p;
- return (a[0]<<8)|(a[1]<<0);
-}
--- a/os/port/netif.h
+++ b/os/port/netif.h
@@ -87,8 +87,8 @@
/* statistics */
int misses;
- int inpackets;
- int outpackets;
+ uvlong inpackets;
+ uvlong outpackets;
int crcs; /* input crc errors */
int oerrs; /* output errors */
int frames; /* framing errors */
@@ -123,6 +123,11 @@
ETHERMINTU = 60, /* minimum transmit size */
ETHERMAXTU = 1514, /* maximum transmit size */
ETHERHDRSIZE = 14, /* size of an ethernet header */
+
+ /* ethernet packet types */
+ ETARP = 0x0806,
+ ETIP4 = 0x0800,
+ ETIP6 = 0x86DD,
};
struct Etherpkt
--- a/os/port/portfns.h
+++ b/os/port/portfns.h
@@ -108,8 +108,8 @@
void gotolabel(Label*);
char* getconfenv(void);
void (*hwrandbuf)(void*, u32);
-void hnputl(void*, ulong);
-void hnputs(void*, ushort);
+void hnputl(void*, u32);
+void hnputs(void*, u16);
Block* iallocb(int);
void iallocsummary(void);
void ilock(Lock*);
@@ -327,9 +327,7 @@
void validaddr(void*, ulong, int);
void* vmemchr(void*, int, int);
-void hnputv(void*, vlong);
-void hnputl(void*, ulong);
-void hnputs(void*, ushort);
-vlong nhgetv(void*);
-ulong nhgetl(void*);
-ushort nhgets(void*);
+void hnputv(void*, u64);
+u64 nhgetv(void*);
+u32 nhgetl(void*);
+u16 nhgets(void*);