ref: c985562065363cb59497579c4c89e283883f5a45
parent: e9887eaf83eea7d530704ab3f1caab08b35af404
author: 9ferno <[email protected]>
date: Sat Oct 22 06:16:36 EDT 2022
more comments
--- a/TODO
+++ b/TODO
@@ -1,3 +1,6 @@
+implement sync
+throttling when the write queue becomes too much
+ show the writer queue when reading /adm/ctl
profiling
read only mode
mafswrite():
--- a/all.h
+++ b/all.h
@@ -53,7 +53,7 @@
struct Iobuf
{
Ref;
- RWLock;
+ RWLock; /* controls access to this Iobuf */
u64 blkno; /* block number on the disk, primary key */
Iobuf *fore; /* for lru */
Iobuf *back; /* for lru */
@@ -61,8 +61,8 @@
u8 *xiobuf; /* "real" buffer pointer */
Content *io; /* cast'able to contents */
};
- u32 dirties;
- u8 tofree;
+ u32 dirties; /* number of dirty blocks yet to be written by the writer */
+ u8 tofree; /* free this Iobuf after the dirty blocks are written to the disk */
};
extern u32 nbuckets; /* n hash buckets for i/o */
--- a/dat.h
+++ b/dat.h
@@ -23,7 +23,6 @@
MiB = KiB*KiB, /* Mibibytes */
GiB = KiB*MiB, /* Gibibytes */
TiB = KiB*GiB, /* Tibibytes */
- Nwriters = 10, /* max. number of writer processes */
/*
https://cs.stackexchange.com/questions/11029/why-is-it-best-to-use-a-prime-number-as-a-mod-in-a-hashing-function
@@ -30,11 +29,14 @@
using small numbers for testing
using Nbuckets=2 for testing.
- Size of buffer cache = Nbuckets * Ncollisions * Rawblocksize
- = 2048 * 10 * 512 bytes = 10MiB
+ Size of buffer cache is approximately (Nbuckets * Ncollisions * Rawblocksize).
+
+ If you have RAM to spare, increase the Nbuckets instead of
+ Ncollisions as the hash index lookup is faster than searching
+ through a linked list.
*/
- Nbuckets = 2048, /* number of Hiob */
- Ncollisions = 10, /* soft limit after which we start reusing older Iobuf's */
+ Nbuckets = 256*KiB, /* number of Hiob */
+ Ncollisions = 4, /* soft limit after which we start reusing older Iobuf's */
/* Qpnone is the Tag.path of free blocks/extents(Tfree),
and zero'ed out dentry blocks */
@@ -69,7 +71,7 @@
enum {
Rawblocksize= 512ULL, /* real block size */
Ndblock = 32, /* number of direct blocks in a Dentry */
- Niblock = 6, /* max depth of indirect blocks */
+ Niblock = 6, /* maximum depth of indirect blocks */
/* global block numbers. The bkp contents locations are calculated by ream() */
Bmagicb = 0, /* block number of first block. Bmagic conflicts with bio.h */
@@ -101,7 +103,6 @@
typedef struct Content Content;
typedef struct Dentry1 Dentry1;
typedef struct Dentry Dentry;
-typedef struct Spanid Spanid;
typedef struct Fblock Fblock;
typedef struct Qid9p1 Qid9p1;
typedef union Rabuf Rabuf;
@@ -163,14 +164,16 @@
{
Qid9p1 qid;
u64 size; /* 0 for directories. For files, size in bytes of the content */
- u64 pdblkno; /* parent dentry absolute block number. Will be 0 for root. */
+ u64 pdblkno; /* block number of the parent directory entry. Will be 0 for root. */
u64 pqpath; /* parent qid.path */
- u64 mtime; /* modified time nano seconds from epoch */
+ u64 mtime; /* modified time in nano seconds from epoch */
u32 mode; /* same bits as defined in lib.h Dir.mode */
s16 uid;
s16 gid;
s16 muid;
- u64 dblocks[Ndblock]; /* direct blocks */
+ u64 dblocks[Ndblock]; /* direct blocks. */
+ /* List of Tdata block numbers for files and
+ Tdentry block numbers for directories */
/* Tag.type = Tdentry for directories and Tdata for files */
u64 iblocks[Niblock]; /* indirect blocks */
};
@@ -184,7 +187,7 @@
*/
enum {
Blocksize = Rawblocksize - sizeof(Tag),
- Namelen = (Blocksize-sizeof(Dentry1)),/* max size of file name components */
+ Namelen = (Blocksize-sizeof(Dentry1)), /* maximum size of the name of a file or directory */
Iounit = MAXRPC, /* in bytes */
@@ -348,17 +351,19 @@
enum
{
Tblank = 0,
+ Tfree = 0, /* free block */
Tnone = 0,
Tmagic, /* the first (zero'th) block holds the magic */
Tdentry, /* directory entry */
/* Tdata & indirect blocks are last, to allow for greater depth */
- Tdata, /* has the file contents */
- Tind0, /* points to Tdata blocks for files or Tdentry blocks for directories. */
- Tind1, /* points to Tind0 blocks */
- Tind2, /* points to Tind1 blocks */
- Tind3, /* list of Tind2 blocks */
- Tind4, /* list of Tind3 blocks */
- Tind5, /* list of Tind4 blocks, maximum file size 26 TiB */
+ Tdata, /* actual file contents */
+ Tind0, /* contains a list of Tdata block numberss for files
+ and Tdentry block numbers for directories.*/
+ Tind1, /* contains a list of Tind0 block numbers */
+ Tind2, /* contains a list of Tind1 block numbers */
+ Tind3, /* contains a list of Tind2 block numbers */
+ Tind4, /* contains a list of Tind3 block numbers */
+ Tind5, /* contains a list of Tind4 block numbers, maximum file size 26 TiB */
Maxtind, /* should be Tind0+Niblock */
/* gap for more indirect block depth in future */
MAXTAG,
--- a/docs/mafs.ms
+++ b/docs/mafs.ms
@@ -105,27 +105,10 @@
"3 " at Block3.w rjust
}
.PE
-.ne 20
.sp
-.nf
-The different types of possible Span's on a disk are:
-enum
-{
- Tfree = 0, /* free blocks */
- Tmagic, /* the first (zero'th) block holds a magic word */
- Tdentry, /* directory entry */
- /* Tind\fIn\fR are last, to allow for future increases */
- Tdata, /* file contents */
- Tind0, /* list of Tdata Spans for files or Tdentry Spans for directories.*/
- Tind1, /* list of Tind0 blocks */
- Tind2, /* list of Tind1 blocks */
- Tind3, /* list of Tind2 blocks */
- Tind4, /* list of Tind3 blocks */
- Tind5, /* list of Tind4 blocks, maximum file size 26 TiB */
-};
-.sp
A block is stored to the disk with a Tag.
.br
+.nf
struct Tag
{
u64 path; /* Qid.path, unique identifier of directory or file */
@@ -136,16 +119,48 @@
};
.fi
.sp
+A block in the process of being written is marked dirty (Tag.dirty = 1). After a successful write, the Tag.dirty is set to 0. This helps identify dirty blocks after a crash.
+.sp
Every file or directory is represented on the disk by a directory entry (Dentry). A directory entry uses a block (Tag.type = Tdentry) and is uniquely identifiable by a Qid.
.sp
+A file stores its contents in blocks with a Tag.type of Tdata. A directory holds the directory entries of it's children in blocks with a Tag.type of Tdentry.
+.sp
+The blocks used by a file or directory entry are listed in their directory entry. As it is not possible to represent big files using a list of blocks, the blocks are structured to use multiple levels of indirection as file size increases.
+.sp
+A file's data blocks are identified by a tag of Tdata and that file's Qid.path. A directory's data blocks are identified by a tag of Tdentry and Qid.path of the child directory entry. (Is this quirky? Should the child's directory entry have a tag o the parent's Qid.path?)
+.sp
+A block number of zero represents the end of the file's contents. If a file is truncated, the data and indirect blocks are given up and the dentry.dblocks[0] = 0.
+.sp
Mafs does not store the last access time of a file or directory.
+.ne 20
.sp
-A Dentry is defined as:
.nf
+The different types of blocks on a disk are:
+.br
+.nf
+enum
+{
+ Tfree = 0, /* free block */
+ Tmagic, /* the first (zero'th) block holds a magic word */
+ Tdentry, /* directory entry */
+ /* Tind\fIn\fR are last, to allow for future increases */
+ Tdata, /* actual file contents */
+ Tind0, /* contains a list of Tdata block numberss for files
+ and Tdentry block numbers for directories.*/
+ Tind1, /* contains a list of Tind0 block numbers */
+ Tind2, /* contains a list of Tind1 block numbers */
+ Tind3, /* contains a list of Tind2 block numbers */
+ Tind4, /* contains a list of Tind3 block numbers */
+ Tind5, /* contains a list of Tind4 block numbers, maximum file size 26 TiB */
+};
+.fi
+.sp
+A directory entry is defined as:
+.nf
enum {
Rawblocksize= 512, /* real block size */
Ndblock = 32,/* number of direct blocks in a Dentry */
- Niblock = 6, /* max depth of indirect blocks */
+ Niblock = 6, /* maximum depth of indirect blocks */
};
struct Qid9p1
{
@@ -152,6 +167,7 @@
u32 version;
u64 path; /* unique identifier */
};
+
struct Dentry1
{
Qid9p1 qid;
@@ -163,21 +179,23 @@
s16 uid;
s16 gid;
s16 muid;
- u64 dblocks[Ndblock]; /* direct blocks */
+ u64 dblocks[Ndblock]; /* direct blocks. */
+ /* List of Tdata block numbers for files and
+ Tdentry block numbers for directories */
/* Tag.type = Tdentry for directories and Tdata for files */
u64 iblocks[Niblock]; /* indirect blocks */
};
/*
- * derived constants
- * Ndentriesperblock: number of Dentry's per block
+ * Derived constants
+ * Ndentryperblock: number of directory entries per block
* Nindperblock: number of block pointers per block
*/
enum {
Blocksize = Rawblocksize - sizeof(Tag),
- Namelen = (Blocksize-sizeof(Dentry1)), /* max size of file name components */
+ Namelen = (Blocksize-sizeof(Dentry1)), /* maximum size of the name of a file or directory */
- Ndentryperblock = 1, /* Blocksize / sizeof(Dentry), */
+ Ndentryperblock = 1, /* Blocksize / sizeof(Dentry), */
Nindperblock = Blocksize / sizeof(u64),
};
struct Dentry
@@ -185,8 +203,14 @@
struct Dentry1;
char name[Namelen];
};
+.fi
.sp
-: mafs ; tests/6.sizes # shows the values of the above derived variables.
+A directory entry once assigned is not given up until the parent directory is removed. It is zero'ed if the directory entry is removed. It is reused by the next directory entry created under that parent directory. This removes the need for garbage collection of directory entries on removals and also avoids zero block numbers in the middle of a directory. A zero block number while traversing a directory entry's dblocks or iblocks represents the end of directory or file contents. When a directory is removed, the parent will have a directory entry with a tag of Tdentry and Qpnone and the rest of the contents set to zero.
+.sp
+A directory's size is always zero.
+.sp
+.nf
+tests/6.sizes # shows the values of the above derived variables.
Namelen 144 Ndblock 32 Niblock 6
Blocksize 502 Nindperblock 62
A Tind0 unit points to 1 data blocks (502 bytes)
@@ -214,10 +238,8 @@
reli start 931151434 max 57731387017
max size 57731387018*Blocksize = 28981156283036 bytes = 26 TiB
.fi
+.ne 30
.sp
-.sp
-.ne 20
-.sp
On an empty mafs filesystem mounted at /n/mafs, the disk contents added by the below commands are:
.nf
mkdir /n/mafs/dir1
@@ -231,7 +253,7 @@
down
{ Bound: box height 10*bigboxht width 3.3*boxwid }
move 0.1i
- box height fieldht invis "Tdentry 1 64"
+ box height fieldht invis "Tdentry 64"
box height fieldht invis "qid.version 0"
box height fieldht invis "qid.path 64"
box height fieldht invis "size 0"
@@ -267,7 +289,7 @@
down
{ Bound: box height 10*bigboxht width 3.3*boxwid }
move 0.1i
- box height fieldht invis "Tdentry 1 65"
+ box height fieldht invis "Tdentry 65"
box height fieldht invis "qid.version 0"
box height fieldht invis "qid.path 65"
box height fieldht invis "size 5"
@@ -301,9 +323,9 @@
.sp
Contents of block 20 are:
.nf
-disk/block tests/test.1/disk 20 | xd -c
-0000000 T d a t a 6 5 \n t e s t \n
-000000e
+disk/block tests/test.1/disk 20
+Tdata 65
+test
.fi
.PS
right
@@ -311,7 +333,7 @@
down
{ Bound: box height 8.5*bigboxht width 3.3*boxwid }
move 0.1i
- box height fieldht invis "Tdentry 1 66"
+ box height fieldht invis "Tdentry 66"
box height fieldht invis "qid.version 0"
box height fieldht invis "qid.path 66"
box height fieldht invis "size 0"
@@ -334,7 +356,7 @@
box height fieldht invis "."
box height fieldht invis " 5 0"
box height fieldht invis "name dir2"
- "Block 21 contents: /dir2 Dentry" at Bound.nw + 0,0.1i ljust
+ "Block 21 contents: /dir2 directory entry" at Bound.nw + 0,0.1i ljust
"Representation of two files in a directory (/dir2/file1 and /dir2/file2)" ljust at Bound.nw + 0.2,0.3i
}
move 4*boxwid
@@ -342,7 +364,7 @@
down
{ Bound: box height 8.5*bigboxht width 3.3*boxwid }
move 0.1i
- box height fieldht invis "Tdentry 1 67"
+ box height fieldht invis "Tdentry 67"
box height fieldht invis "qid.version 0"
box height fieldht invis "qid.path 67"
box height fieldht invis "size 5"
@@ -365,7 +387,7 @@
box height fieldht invis "."
box height fieldht invis " 5 0"
box height fieldht invis "name file1"
- "Block 22 contents: file1 Dentry" at Bound.nw + 0,0.1i ljust
+ "Block 22 contents: file1 directory entry" at Bound.nw + 0,0.1i ljust
}
down
move 9*bigboxht
@@ -373,7 +395,7 @@
down
{ Bound: box height 8.5*bigboxht width 3.3*boxwid }
move 0.1i
- box height fieldht invis "Tdentry 1 68"
+ box height fieldht invis "Tdentry 68"
box height fieldht invis "qid.version 0"
box height fieldht invis "qid.path 68"
box height fieldht invis "size 5"
@@ -396,15 +418,15 @@
box height fieldht invis "."
box height fieldht invis " 5 0"
box height fieldht invis "name file2"
- "Block 24 contents: file2 Dentry" at Bound.nw + 0,0.1i ljust
+ "Block 24 contents: file2 directory entry" at Bound.nw + 0,0.1i ljust
}
.PE
.sp
-iblocks[0] has the block number of a Tind0 block. A Tind0 block has a list of Tdata block numbers for files and Tdentry block numbers for directories.
+iblocks[0] has the block number of a Tind0 block. A Tind0 block contains a list of Tdata block numbers for files or a list of Tdentry block numbers for directories.
.sp
-iblocks[1] contains the block number of a Tind1 block. A Tind1 block has a list of block numbers of Tind0 blocks.
+iblocks[1] has the block number of a Tind1 block. A Tind1 block contains a list of Tind0 block numbers.
.sp
-Similarly, for other iblocks[n] entries, iblocks[n] contains the block number of a Tind\fIn\fR block. A Tind\fIn\fR block has a list of block numbers of Tind\fI(n-1)\fR blocks.
+Similarly, for other iblocks[n] entries, iblocks[n] has the block number of a Tind\fIn\fR block. A Tind\fIn\fR block contains a list of Tind\fI(n-1)\fR block numbers.
.sp
.sp
Relative index
@@ -411,7 +433,7 @@
.sp
The zero'th relative index in a directory entry is the first data block. The next relative index is the second data block of the directory entry, and so on.
.sp
-tests/chkreli.rc tests the translation of a relative index(reli) to an actual disk block number.
+tests/6.reli shows how a relative index (reli) is translated into an actual disk block number.
.sp
To find the actual block number where the first block (zero'th as zero indexed) of a file is stored:
.nf
@@ -562,14 +584,12 @@
}
.PE
.sp
-The Tag.dirty flag is set while a block is being written. This helps identify dirty blocks after a crash.
-.sp
.TS
box;
c s s s
c l c c
l a a a .
-Disk State after ream - System Files
+System Files
=
Block Description Directory entry Content
_
@@ -598,7 +618,7 @@
.TE
.ta 5n 10n 15n 20n 25n 30n 35n 40n 45n 50n 55n 60n 65n 70n 75n 80n
.sp
-The /adm/ctl file is used to halt or sync. /adm/users is a r/w file that will reload users when written to it. The owner of the /adm/ctl file or any user belonging to the sys group can ream the disk.
+The /adm/ctl file is used to halt or sync the file system. /adm/users is a r/w file that will reload users when written to it. The owner of the /adm/ctl file or any user belonging to the sys group can ream the disk.
.sp
There is no /adm/magic as the block number of the magic block is zero and zero block in a directory entry signifies the end of the directory contents.
.sp
@@ -627,14 +647,6 @@
Mafs needs atleast Nminblocks=17 blocks (8.5 KB). The middle block number is Nminblocks + ((nblocks - Nminblocks)/2), where nblocks = total number of blocks.
.fi
.sp
-A directory entry once assigned is not given up until the parent directory is removed. It is zero'ed if the directory entry is removed. It is reused by the next directory entry created under that parent directory. This removes the need for garbage collection of directory entries on removals and also avoids zero block numbers in the middle of a directory. A zero block number while traversing a directory's dspanids or iblocks represents the end of directory or file contents. When a directory is removed, the parent will have a directory entry with a tag of Tdentry and Qpnone and the rest of the contents set to zero.
-.sp
-A directory's size is always zero.
-.sp
-A file's data blocks are identified by a tag of Tdata and Qid.path. A block number of zero represents the end of the file's contents. If a file is truncated, the data and indirect blocks are given up and the dentry.dblocks[0] = 0.
-.sp
-The directory entry is locked with a read-write lock (RWlock) for any file operations. This ensures synchronization across multiple processes updating the same file.
-.sp
kfs and cwfs use 8192 byte blocks. Hence, they store multiple directory entries (Dentry) per block. They use slot numbers to identify a particular directory entry in a block of directory entries. Mafs avoids that be using 512 byte blocks thus having only one directory entry per block. This avoids locking up other sibling directory entries on access.
.sp
.sp
@@ -644,9 +656,11 @@
.sp
An Iobuf is used to represent a block in memory. An Iobuf is unique to a block. All disk interaction, except for free block management, happens through an Iobuf. We read a block from the disk into an Iobuf. To update a block on the disk, we write to an Iobuf, which, in-turn gets written to the disk.
.sp
+An Iobuf is protected by a read-write lock (RWlock). This ensures synchronization across multiple processes updating the same file.
+.sp
getbuf(), putbuf() and putbuffree() are used to manage Iobuf's. The contents of an Iobuf is not touched unless it is locked between getbuf(), putbuf() and putbuffree() calls.
.sp
-allocblock() allocates free blocks into an Iobuf.
+allocblock() allocates a free block into an Iobuf.
.sp
freeblock() erases the Iobuf and returns the block to the free block management routines.
.sp
@@ -656,27 +670,27 @@
struct Hiob /* Hash bucket */
{
Iobuf* link; /* least recently used Iobuf in the circular linked list */
- QLock;
+ QLock; /* controls access to this hash bucket */
};
struct Iobuf
{
Ref;
- RWLock;
- u64 blkno; /* block number on the disk, primary key */
- Iobuf *fore; /* for lru */
- Iobuf *back; /* for lru */
+ RWLock; /* controls access to this Iobuf */
+ u64 blkno; /* block number on the disk, primary key */
+ Iobuf *fore; /* for lru */
+ Iobuf *back; /* for lru */
union{
u8 *xiobuf; /* "real" buffer pointer */
Content *io; /* cast'able to contents */
- }
- u32 dirties;
- u8 tofree;
+ };
+ u32 dirties; /* number of dirty blocks yet to be written by the writer */
+ u8 tofree; /* free this Iobuf after the dirty blocks are written to the disk */
};
.fi
.sp
-The Iobuf's are arranged into a hash structure of Nbuckets. Each bucket has a circular linked list of Ncollisions' Iobuf's to handle collisions. If all the Iobuf's in the circular linked list are locked, new Iobuf's are added to this linked list. This circular list is ordered on a least recently used basis. Iobuf's once added to this list are not removed. When an Iobuf is not in the list, the oldest unlocked Iobuf is reused.
+The Iobuf's are arranged into a list of hash buckets. Each bucket points a circular linked list of Iobuf's to handle collisions. If all the Iobuf's in the circular linked list are locked, new Iobuf's are added to this linked list. This circular list is ordered on a least recently used basis. Iobuf's once added to this list are not removed. When an Iobuf is not in the list, the oldest unlocked Iobuf is reused.
.sp
-Hiob hiob[nbuckets] is a valid representation of the list of hash buckets. The block number of the Span is hashed to arrive at the relevant hash bucket index.
+Hiob hiob[nbuckets] is a valid representation of the list of hash buckets. The block number is hashed to arrive at the relevant hash bucket index.
.sp
hiob[hash(block number)].link = Address of Iobuf0, where Iobuf0 is the least recently used Iobuf.
.PS
@@ -710,8 +724,10 @@
arrow dashed from Iobufn2.ne - 0.05i,0 to Iobuf2.se - 0.05i,0
.PE
.sp
-The size of the buffer cache is approximately: number of hash buckets * collisions per hash bucket * Maximum Span size. By default, the approximate size of the buffer cache = Nbuckets * Ncollisions * Maxspansize = 256 * 10 * 1MB = 2.56GB. The -h parameter can be used to change the number of hash buckets.
+The size of the buffer cache is: number of hash buckets * collisions per hash bucket * block size. The approximate size of the buffer cache = Nbuckets * Ncollisions * Rawblocksize = 256 * 10 * 512 bytes = 1.28GiB. The -h parameter can be used to change the number of hash buckets.
.sp
+If you have RAM to spare, increase Nbuckets instead of Ncollisions as the hash index lookup is faster than searching through a linked list.
+.sp
Iobuf.Ref is used to avoid locking up the hash bucket when a process is waiting for a lock on an Iobuf in that hash bucket.
.sp
Iobuf.Ref ensures that an Iobuf is not stolen before another process can get to wlock()'ing it after letting go of the lock on the hash bucket. We cannot hold the lock on the hash bucket until we wlock() the iobuf as that blocks other processes from using the hash bucket. This could also result in a deadlock. For example, the directory entry is block 18, which hashes to a hash index of 7. A writer() locked the directory entry iobuf and wants to add a data block 84 to the directory entry. Block 84 hashes to the same hash index of 7. Another process wanting to access the directory entry is waiting for a lock on that io buffer. While doing so, it has locked the hash bucket. Now, this has caused a deadlock between both these processes. The first process cannot proceed until it can lock the hash bucket holding block 84 and is still holding the lock on the directory entry in block 18. The second process cannot lock block 18 and is holding the lock on the hash bucket.
@@ -737,13 +753,10 @@
.sp
The blocks to be written to a disk are stored to a linked list represented by:
.nf
-typedef struct Dirties Dirties;
-typedef struct Wbuf Wbuf;
-
struct Dirties
{
- QLock lck;
- Wbuf *head, *tail;
+ QLock lck; /* controls access to this queue */
+ Wbuf *head, *tail; /* linked list of dirty blocks yet to be written to the disk */
s32 n;
Rendez isempty;
} drts = {0};
@@ -750,10 +763,9 @@
struct Wbuf
{
- u64 blkno; /* block number on the disk, primary key */
- u16 len; /* number of blocks of data xiobuf points to */
+ u64 blkno; /* block number on the disk, primary key */
Wbuf *prev, *next;
- Iobuf *iobuf;
+ Iobuf *iobuf; /* pointer to the used Iobuf in the buffer cache */
union{
u8 payload; /* "real" contents */
Content io; /* cast'able to contents */
@@ -763,7 +775,7 @@
.sp
A writer process takes the blocks from the Dirties linked list on a FIFO (first-in-first-out) basis and writes them to the disk. putbuf() adds blocks to the end of this linked list.
.sp
-The dirty blocks not yet written to the disk remain in the buffer cache and cannot be stolen when a need for new Iobuf's arises.
+The dirty blocks not yet written to the disk remain in the buffer cache and cannot be stolen when a need for new Iobuf arises.
.sp
Free'd blocks are not written to the disk to avoid writing blanks to a disk.
.sp
@@ -775,9 +787,9 @@
.sp
Free blocks are managed using Extents. The list of free blocks is stored to the disk when shutting down. If this state is not written, then the file system needs to be checked and the list of free blocks should be updated.
.sp
-When shutting down, the Extents are written to free blocks. This information is written to /adm/frees. Also, fsok in the super block is set to 1. When fsok = 0, run an fsck (filesystem checker) to correct the issue.
+When shutting down, the Extents are written to free blocks. This information is written to /adm/frees. Also, fsok in the super block is set to 1. When fsok = 0, run an fsck (filesystem checker) to correct any inconsistencies on the disk.
.sp
-A tag of Tfree and Qpnone represents a free block. If a directory entry is removed, the parent will have a zero'ed out child directory entry (Qid.path = 0) and a tag of Tdentry and Qpnone.
+A tag of Tfree and Qpnone represent a free block. If a directory entry is removed, the parent will have a zero'ed out child directory entry (Qid.path = 0) and a tag of Tdentry and Qpnone.
.sp
.ne 14
Algorithm to allocate blocks from Extents:
@@ -792,8 +804,8 @@
.sp
.nf
struct Extent {
- struct Extent *low, *high; /* sorted by blkno */
- struct Extent *small, *big;/* sorted by len */
+ struct Extent *low, *high; /* sorted by block number */
+ struct Extent *small, *big;/* sorted by the number of blocks in this extent */
u64 blkno; /* block number */
u64 len; /* in blocks */
};
@@ -804,7 +816,7 @@
};
.fi
.sp
-allocblocks() and freeblocks() use balloc() and bfree(). balloc() assigns blocks and bfree() holds them for next allocation.
+allocblock() and freeblock() use balloc() and bfree() respectively. balloc() assigns blocks from an extent and bfree() adds them to an extent for next allocation.
.sp
.PS
# define field { [right; box invis $1 ljust; box invis $2 rjust; down] }
@@ -888,7 +900,6 @@
down
arrowwid=0.15
arrowht=0.15
- arrowhead=7
arrow 0.25i at $1
}
right
@@ -1096,6 +1107,18 @@
.sp
.TS
allbox;
+c l
+l a .
+Program Description
+_
+disk/mafs Start mafs on a disk
+disk/free List the free blocks
+disk/used List the used blocks
+disk/block Show the contents of a block
+.TE
+.sp
+.TS
+allbox;
c l r
l a r .
File Description chatty9p
@@ -1171,6 +1194,33 @@
mount -c /srv/mafs_sdF1 /n/mafs_sdF1
.fi
.sp
+Starting mafs on a 2MB byte file. The below commands create a disk.file to use as a disk. Mount /n/mafs_disk.file for the file system.
+.nf
+.sp
+ dd -if /dev/zero -of disk.file -bs 512 -count 4096;
+.br
+ mount -c <{disk/mafs -s -r mafs_disk.file -m 1 -n mafs_disk.file \\
+ <[0=1]} /n/mafs_disk.file
+.fi
+.sp
+Starting mafs on a RAM file. The below commands create a ramfs filesystem to use as a disk.
+.nf
+.sp
+ ramfs -m /n/mafs_ramfs
+.br
+ touch /n/mafs_ramfs/file
+.br
+ dd -if /dev/zero -of /n/mafs_ramfs/file -count 700 -bs 1m
+.br
+ disk/mafs -r mafs_ramfs_file /n/mafs_ramfs/file
+.br
+ mount -c /srv/mafs_ramfs_file /n/mafs_ramfs_file
+.fi
+.sp
+Sync Mafs. This command does not return until all the writes are written to the disk. So, could take a long time if you have a long writer queue.
+.sp
+ echo sync >> /n/mafs_myservice/adm/ctl
+.sp
Stop Mafs.
.sp
echo halt >> /n/mafs_myservice/adm/ctl
@@ -1194,50 +1244,14 @@
-F <{disk/free tests/test.0/disk} 32
.fi
.sp
-Find traverses the heirarchy and identifies the file that the block number belongs to.
+.ne 3
+Find traverses the heirarchy and identifies the file that a block number belongs to.
.sp
disk/find disk.file blocknumber
.sp
.ti 0
.sp
-Starting mafs on a 2MB byte file. The above commands will create a disk.file to use as a disk. Mount /n/mafs_disk.file for the file system.
-.nf
.sp
- dd -if /dev/zero -of disk.file -bs 512 -count 4096;
-.br
- mount -c <{disk/mafs -s -r mafs_disk.file -m 1 -n mafs_disk.file \\
- <[0=1]} /n/mafs_disk.file
-.fi
-.sp
-tests/sizes.c shows the maximum file size representable by a Dentry.
-.br
-.nf
-.in 3n
-: tests ; ./6.sizes
-Namelen 174 Ndspanid 24 Niblock 4
-Blocksize 500 Nspanidperblock 50 Nindperblock 62
-Maxspanlen 2048 Maxspansize 1048564
-A Tind0 unit points to 1 data spans (1048564 bytes)
- block points to 50 data spans
-A Tind1 unit points to 50 data spans (52428200 bytes)
- block points to 3100 data spans
-A Tind2 unit points to 3100 data spans (3250548400 bytes)
- block points to 192200 data spans
-A Tind3 unit points to 192200 data spans (201534000800 bytes)
- block points to 11916400 data spans
-sizeof(Dentry1) 326 Namelen 174
-maxsize direct blocks max 24
-maxsize Tind0 50 max 74
-maxsize Tind1 3100 max 3174
-maxsize Tind2 192200 max 195374
-maxsize Tind3 11916400 max 12111774
-maximum possible spans 12111774
- (12111774*Maxspansize = 12699970192536 bytes)
- (12111774*Maxspansize = 12699970192536 bytes = 11 TB)
-.fi
-.in 0
-.sp
-.sp
.ne 6
.ft B
Tests
@@ -1286,9 +1300,9 @@
_
tests/test.0 empty disk
tests/test.1 create a file /dir1/file1 and echo test into it
-tests/test.2 writes at different offsets to a file and then removing the file
+tests/test.2 writes at different offsets to a file and then removes the file
_
-tests/test.3 write, read and deletion of files with sizes upto 16384 blocks
+tests/test.3 write, read and delete files with sizes upto 16384 blocks
tests/test.4 directory copy
tests/test.5 fcp gzipped files
_
@@ -1320,6 +1334,7 @@
tests/extents/mergeprevious Figure 6 of the Extents section
.TE
.sp
+.ne 3
.nf
To loop through all the blocks of a test:
.in 3n
binary files a/docs/mafs.pdf b/docs/mafs.pdf differ
--- a/docs/mkfile
+++ b/docs/mkfile
@@ -5,6 +5,7 @@
# fn v { mk view }
view: mafs.pdf
window -r 0 0 800 1000 page -R /mnt/term/home/j/local/plan9/custom/mafs/docs/mafs.pdf
+ cp /mnt/term/home/j/local/plan9/custom/mafs/docs/mafs.pdf /mnt/term/tmp/mafs.pdf
clean:
rm *.ps *.pdf
--- a/extents.h
+++ b/extents.h
@@ -12,8 +12,8 @@
2. If no extent of the len we need is available, then break up the smallest extent.
*/
struct Extent {
- struct Extent *low, *high; /* sorted by blkno */
- struct Extent *small, *big; /* sorted by len */
+ struct Extent *low, *high; /* sorted by block number */
+ struct Extent *small, *big;/* sorted by the number of blocks in this extent */
u64 blkno; /* block number */
u64 len; /* in blocks */
};
--- a/writer.c
+++ b/writer.c
@@ -17,8 +17,8 @@
struct Dirties
{
- QLock lck;
- Wbuf *head, *tail;
+ QLock lck; /* controls access to this queue */
+ Wbuf *head, *tail; /* linked list of dirty blocks yet to be written to the disk */
s32 n;
Rendez isempty;
} drts = {0};
@@ -26,9 +26,8 @@
struct Wbuf
{
u64 blkno; /* block number on the disk, primary key */
- u16 len; /* number of blocks of data xiobuf points to */
Wbuf *prev, *next;
- Iobuf *iobuf;
+ Iobuf *iobuf; /* pointer to the used Iobuf in the buffer cache */
union{
u8 payload; /* "real" contents */
Content io; /* cast'able to contents */