jreadv_const.patch
  The vector argument to jreadv() must be const.

fix_commit_locking.patch
  Fix partial unlocking in jtrans_commit().

free_tid_race.patch
  Fix a very important race in the transaction ID freeing path.

tid_is_unsigned.patch
  Make get_jtfile() accept unsigned tids.

tid_assignment_doc.patch
  Add a small document describing how transaction IDs are assigned.

preloader.patch
  Add a preloader DSO.

jopen_readonly.patch
  Allow to open files read-only.

add_ro_constant.patch
  Add J_RDONLY constant to Python binding.

trans_add_errors.patch
  Raise an exception if trans.add() fails.

preload_unlink.patch
  Add unlink() to the preload library.

version-0.21.patch
  Version 0.21



 Make.conf                         |    2 
 Makefile                          |   29 +-
 bindings/preload/libjio_preload.c |  480 ++++++++++++++++++++++++++++++++++++++
 bindings/python/libjio.c          |    3 
 common.c                          |    4 
 common.h                          |    2 
 doc/tids                          |   83 ++++++
 libjio.h                          |    3 
 trans.c                           |   82 ++++--
 unix.c                            |    2 
 10 files changed, 649 insertions(+), 41 deletions(-)


unchanged:
--- cur/unix.c~jreadv_const	2004-09-26 15:34:11.012735592 -0300
+++ cur-root/unix.c	2004-09-26 15:34:22.847936368 -0300
@@ -54,7 +54,7 @@ ssize_t jpread(struct jfs *fs, void *buf
 }
 
 /* readv wrapper */
-ssize_t jreadv(struct jfs *fs, struct iovec *vector, int count)
+ssize_t jreadv(struct jfs *fs, const struct iovec *vector, int count)
 {
 	int rv, i;
 	size_t sum;
unchanged:
--- cur-root/libjio.h	2004-09-26 15:34:38.143611072 -0300
+++ cur-root/libjio.h	2004-10-10 20:58:49.755103608 -0300
@@ -120,7 +120,7 @@
 /* UNIX API wrappers */
 ssize_t jread(struct jfs *fs, void *buf, size_t count);
 ssize_t jpread(struct jfs *fs, void *buf, size_t count, off_t offset);
-ssize_t jreadv(struct jfs *fs, struct iovec *vector, int count);
+ssize_t jreadv(struct jfs *fs, const struct iovec *vector, int count);
 ssize_t jwrite(struct jfs *fs, const void *buf, size_t count);
 ssize_t jpwrite(struct jfs *fs, const void *buf, size_t count, off_t offset);
 ssize_t jwritev(struct jfs *fs, const struct iovec *vector, int count);
@@ -150,6 +150,7 @@
 #define J_COMMITTED	8	/* mark a transaction as committed */
 #define J_ROLLBACKED	16	/* mark a transaction as rollbacked */
 #define J_ROLLBACKING	32	/* mark a transaction as rollbacking */
+#define J_RDONLY	64	/* mark a file as read-only */
 
 /* disk constants */
 #define J_DISKHEADSIZE	 12	/* length of disk_header */
unchanged:
--- cur-root/trans.c	2004-10-03 12:46:15.414213920 -0300
+++ cur-root/trans.c	2004-10-10 20:58:49.755103608 -0300
@@ -63,10 +63,9 @@
 	/* read the current max. curid */
 	curid = *(fs->jmap);
 
-	if (tid < curid) {
-		/* we're not freeing the max. curid, so we just return */
-		goto exit;
-	} else {
+	/* if we're the max tid, scan the directory looking up for the new
+	 * max; the detailed description can be found in the "doc/" dir */
+	if (tid == curid) {
 		/* look up the new max. */
 		for (i = curid - 1; i > 0; i--) {
 			/* this can fail if we're low on mem, but we don't
@@ -83,7 +82,6 @@
 		*(fs->jmap) = i;
 	}
 
-exit:
 	plockf(fs->jfd, F_UNLOCK, 0, 0);
 	return;
 }
@@ -140,9 +138,16 @@
 {
 	struct joper *jop, *tmpop;
 
+	pthread_mutex_lock(&(ts->lock));
+
+	/* fail for read-only accesses */
+	if (ts->flags & J_RDONLY) {
+		pthread_mutex_unlock(&(ts->lock));
+		return 0;
+	}
+
 	/* find the last operation in the transaction and create a new one at
 	 * the end */
-	pthread_mutex_lock(&(ts->lock));
 	if (ts->op == NULL) {
 		ts->op = malloc(sizeof(struct joper));
 		if (ts->op == NULL) {
@@ -203,6 +208,10 @@
 	ts->flags = ts->flags & ~J_COMMITTED;
 	ts->flags = ts->flags & ~J_ROLLBACKED;
 
+	/* fail for read-only accesses */
+	if (ts->flags & J_RDONLY)
+		goto exit;
+
 	name = (char *) malloc(PATH_MAX);
 	if (name == NULL)
 		goto exit;
@@ -353,10 +362,6 @@
 	written = 0;
 	for (op = ts->op; op != NULL; op = op->next) {
 		rv = spwrite(ts->fs->fd, op->buf, op->len, op->offset);
-
-		plockf(ts->fs->fd, F_UNLOCK, op->offset, op->len);
-		op->locked = 0;
-
 		if (rv != op->len)
 			goto rollback_exit;
 
@@ -415,9 +420,17 @@
 	}
 
 	close(fd);
-	for (op = ts->op; op != NULL; op = op->next) {
-		if (op->locked)
-			plockf(ts->fs->fd, F_UNLOCK, op->offset, op->len);
+
+	/* always unlock everything at the end; otherwise we could have
+	 * half-overlapping transactions applying simultaneously, and if
+	 * anything goes wrong it's possible to break consistency */
+	if (!(ts->flags & J_NOLOCK)) {
+		for (op = ts->op; op != NULL; op = op->next) {
+			if (op->locked) {
+				plockf(ts->fs->fd, F_UNLOCK,
+						op->offset, op->len);
+			}
+		}
 	}
 
 exit:
@@ -523,12 +536,18 @@
 	fs->jdirfd = -1;
 	fs->jmap = MAP_FAILED;
 
-	/* we always need read and write access, because when we commit a
-	 * transaction we read the current contents before applying, and write
-	 * access is needed for locking with fcntl */
-	flags = flags & ~O_WRONLY;
-	flags = flags & ~O_RDONLY;
-	flags = flags | O_RDWR;
+	/* we provide either read-only or read-write access, because when we
+	 * commit a transaction we read the current contents before applying,
+	 * and write access is needed for locking with fcntl; the test is done
+	 * this way because O_RDONLY is usually 0, so "if (flags & O_RDONLY)"
+	 * will fail. */
+	if ((flags & O_WRONLY) || (flags & O_RDWR)) {
+		flags = flags & ~O_WRONLY;
+		flags = flags & ~O_RDONLY;
+		flags = flags | O_RDWR;
+	} else {
+		jflags = jflags | J_RDONLY;
+	}
 
 	fs->name = strdup(name);
 	fs->flags = jflags;
@@ -557,6 +576,11 @@
 	if (fs->fd < 0)
 		goto error_exit;
 
+	/* nothing else to do for read-only access */
+	if (flags & O_RDONLY) {
+		return fs->fd;
+	}
+
 	if (!get_jdir(name, jdir))
 		goto error_exit;
 	rv = mkdir(jdir, 0750);
@@ -643,20 +667,22 @@
 
 	ret = 0;
 
-	if (jsync(fs))
-		ret = -1;
+	if (! (fs->flags & J_RDONLY)) {
+		if (jsync(fs))
+			ret = -1;
+		if (fs->jfd < 0 || close(fs->jfd))
+			ret = -1;
+		if (fs->jdirfd < 0 || close(fs->jdirfd))
+			ret = -1;
+		if (fs->jmap != MAP_FAILED)
+			munmap(fs->jmap, sizeof(unsigned int));
+	}
+
 	if (fs->fd < 0 || close(fs->fd))
 		ret = -1;
-	if (fs->jfd < 0 || close(fs->jfd))
-		ret = -1;
-	if (fs->jdirfd < 0 || close(fs->jdirfd))
-		ret = -1;
 	if (fs->name)
 		/* allocated by strdup() in jopen() */
 		free(fs->name);
-	if (fs->jmap != MAP_FAILED)
-		munmap(fs->jmap, sizeof(unsigned int));
-
 	pthread_mutex_destroy(&(fs->lock));
 
 	return ret;
unchanged:
--- cur/common.c~tid_is_unsigned	2004-10-04 12:24:37.891332208 -0300
+++ cur-root/common.c	2004-10-04 12:24:54.867751400 -0300
@@ -126,7 +126,7 @@ int get_jdir(const char *filename, char 
 }
 
 /* build the filename of a given transaction */
-int get_jtfile(const char *filename, int tid, char *jtfile)
+int get_jtfile(const char *filename, unsigned int tid, char *jtfile)
 {
 	char *base, *baset;
 	char *dir, *dirt;
@@ -141,7 +141,7 @@ int get_jtfile(const char *filename, int
 		return 0;
 	dir = dirname(dirt);
 
-	snprintf(jtfile, PATH_MAX, "%s/.%s.jio/%d", dir, base, tid);
+	snprintf(jtfile, PATH_MAX, "%s/.%s.jio/%u", dir, base, tid);
 
 	free(baset);
 	free(dirt);
unchanged:
--- cur/common.h~tid_is_unsigned	2004-10-04 12:25:05.251172880 -0300
+++ cur-root/common.h	2004-10-04 12:25:15.631594816 -0300
@@ -30,7 +30,7 @@ off_t plockf(int fd, int cmd, off_t offs
 ssize_t spread(int fd, void *buf, size_t count, off_t offset);
 ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset);
 int get_jdir(const char *filename, char *jdir);
-int get_jtfile(const char *filename, int tid, char *jtfile);
+int get_jtfile(const char *filename, unsigned int tid, char *jtfile);
 
 int checksum(int fd, size_t len, uint32_t *csum);
 uint32_t checksum_map(uint8_t *map, size_t count);
unchanged:
--- /dev/null	2004-04-13 23:59:22.000000000 -0300
+++ cur-root/doc/tids	2004-10-04 12:30:34.000000000 -0300
@@ -0,0 +1,83 @@
+
+Transaction ID assignment procedure
+Alberto Bertogli (albertogli@telpin.com.ar)
+4/October/2004
+---------------------------------------------
+
+This brief document describes how libjio assigns an unique number to each
+transaction that identifies it univocally during its lifetime.
+
+It is a very delicate issue, because the rest of the library depends on the
+uniqueness of the ID; it has to be coherent across threads and procesess; and
+it can't take long: it serializes transaction creation (and it's the only
+contention point for independent non-overlapping transactions).
+
+
+Description
+-----------
+
+We have two functions: get_tid() and free_tid(), which return a new
+transaction ID, and mark a given transaction ID as no longer in use,
+respectively.
+
+The main piece of the mechanism is the lockfile: a file named "lock" which
+holds the maximum transaction ID in use. This file gets opened and mmap()'ed
+for faster use inside jopen(). That way, we can treat it directly as an
+integer holding the max tid.
+
+To avoid paralell modifications, we will always lock the file with fcntl()
+before accessing it.
+
+Let's begin by describing how get_tid() works, because it's quite simple: it
+locks the lockfile, gets the max tid, adds 1 to it, unlock the file and return
+that value. That way, the new tid is always the new max, and with the locking
+we can be sure it's impossible to assign the same tid to two different
+transactions.
+
+After a tid has been assigned, the commit process will create a file named
+after it inside the journal directory. Then, it will operate on that file all
+it wants, and when the moment comes, the transaction is no longer needed and
+has to be freed.
+
+The first thing we do is to unlink that transaction file. And then, we call
+free_tid(), which will update the lockfile to represent the new max tid, in
+case it has changed.
+
+free_tid() begins by checking that if the transaction we're freeing is the
+greatest, and if not, just returns.
+
+But if it is, we need to find out the new max tid. We do it by "walking" the
+journal directory looking for the file with the greatest number, and that's
+our new max tid. If there are no files, we use 0.
+
+
+Things to notice
+----------------
+
+The following is a list of small things to notice about the mechanism. They're
+useful because races tend to be subtle, and I _will_ forget about them. The
+descriptions are not really detailed, just enough to give a general idea.
+
+
+* It is possible that we get in free_tid() and the transaction we want to free
+is greater than the max tid. In that case, we do nothing: it's a valid
+situation. How to get there: two threads about to free two tids. The first one
+calls unlink() and just after its return (before it gets a chance to call
+free_tid()), another thread, the holder of the current max, steps in and
+performs both the unlink() and free_tid(), which would force a lookup to find
+a new tid, and as in the first thread we have removed the file, the max tid
+could be lower (in particular, it could be 0). This is why we only test for
+equalty.
+
+* Unlink after free_tid() is not desirable: in that case, it'd be normal for
+the tid to increment even if we have only one thread writing. It overflows
+quite easily.
+
+* The fact that new tids are always bigger than the current max is not only
+because the code is cleaner and faster: that way when recovering we know the
+order to apply transactions. A nice catch: this doesn't matter if we're
+working with non-overlapping transactions, but if they overlap, we know that
+it's impossible that transaction A and B (B gets committed after A) get
+applied in the wrong order, because B will only begin to commit _after_ A has
+been worked on.
+
unchanged:
--- cur-root/bindings/preload/libjio_preload.c	2004-10-10 20:58:30.301061072 -0300
+++ cur-root/bindings/preload/libjio_preload.c	2004-10-10 20:59:06.306587400 -0300
@@ -0,0 +1,480 @@
+
+/*
+ * libjio C preloader
+ * Alberto Bertogli (albertogli@telpin.com.ar)
+ *
+ * This generates a shared object that, when prelinked, can be used to make an
+ * existing application to use libjio for UNIX I/O.
+ * It's not nice or pretty, and does some nasty tricks to work both with and
+ * without LFS. I don't think it builds or works without glibc.
+ */
+
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <dlfcn.h>
+
+/* we don't build this with LFS, however, it's essential that the proper
+ * environment is set for libjio's loading; otherwise we would mess the ABI
+ * up */
+typedef long long off64_t;
+#define _FILE_OFFSET_BITS 64
+#define off_t off64_t
+#include <libjio.h>
+#undef off_t
+#undef _FILE_OFFSET_BITS
+
+
+/* maximum number of simultaneous open file descriptors we support */
+#define MAXFD (4096 * 2)
+
+/* recursion counter, per-thread */
+static int __thread called = 0;
+
+
+/* C library functions, filled via the dynamic loader */
+static void *libc;
+
+static int (*c_open)(const char *pathname, int flags, mode_t mode);
+static int (*c_open64)(const char *pathname, int flags, mode_t mode);
+static int (*c_close)(int fd);
+static int (*c_unlink)(const char *pathname);
+static ssize_t (*c_read)(int fd, void *buf, size_t count);
+static ssize_t (*c_pread)(int fd, void *buf, size_t count, off_t offset);
+static ssize_t (*c_pread64)(int fd, void *buf, size_t count, off64_t offset);
+static ssize_t (*c_readv)(int fd, const struct iovec *vector, int count);
+static ssize_t (*c_write)(int fd, const void *buf, size_t count);
+static ssize_t (*c_pwrite)(int fd, const void *buf, size_t count, off_t offset);
+static ssize_t (*c_pwrite64)(int fd, const void *buf, size_t count, off64_t offset);
+static ssize_t (*c_writev)(int fd, const struct iovec *vector, int count);
+static int (*c_ftruncate)(int fd, off_t length);
+static int (*c_ftruncate64)(int fd, off64_t length);
+static off_t (*c_lseek)(int fd, off_t offset, int whence);
+static off64_t (*c_lseek64)(int fd, off64_t offset, int whence);
+static int (*c_fsync)(int fd);
+
+
+/* file descriptor table, to translate fds to jfs */
+struct fd_entry {
+	int fd;
+	struct jfs *fs;
+	pthread_mutex_t lock;
+};
+static struct fd_entry fd_table[MAXFD];
+
+/* useful macros, mostly for debugging purposes */
+#if 1
+	#define rec_inc() do { called++; } while(0)
+	#define rec_dec() do { called--; } while(0)
+	#define printd(...) do { } while(0)
+
+#else
+	/* debug variants */
+
+	#define rec_inc()				\
+		do {					\
+			called++;			\
+			fprintf(stderr, "I: %d\n", called); \
+			fflush(stderr);			\
+		} while (0)
+
+	#define rec_dec()				\
+		do {					\
+			called--;			\
+			fprintf(stderr, "D: %d\n", called); \
+			fflush(stderr);			\
+		} while (0)
+
+	#define printd(...)				\
+		do {					\
+			if (called)			\
+				fprintf(stderr, "\t");	\
+			called++;			\
+			fprintf(stderr, "%s(): ", __FUNCTION__ ); \
+			fprintf(stderr, __VA_ARGS__);	\
+			fflush(stderr);			\
+			called--;			\
+		} while(0)
+#endif
+
+
+/* functions used to lock fds from the table; they do boundary checks so we
+ * catch out of bounds accesses */
+static inline int fd_lock(int fd)
+{
+	if (fd < 0 || fd >= MAXFD) {
+		printd("locking out of bounds fd %d\n", fd);
+		return 0;
+	}
+	//printd("L %d\n", fd);
+	pthread_mutex_lock(&(fd_table[fd].lock));
+	//printd("OK %d\n", fd);
+	return 1;
+}
+
+static inline int fd_unlock(int fd)
+{
+	if (fd < 0 || fd >= MAXFD) {
+		printd("unlocking out of bounds fd %d\n", fd);
+		return 0;
+	}
+	//printd("U %d\n", fd);
+	pthread_mutex_unlock(&(fd_table[fd].lock));
+	//printd("OK %d\n", fd);
+	return 1;
+}
+
+
+/*
+ * library intialization
+ */
+
+static int __attribute__((constructor)) init(void)
+{
+	int i;
+	pthread_mutexattr_t attr;
+
+	printd("starting\n");
+
+	/* initialize fd_table */
+	pthread_mutexattr_init(&attr);
+	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL);
+	for (i = 0; i < MAXFD; i++) {
+		fd_table[i].fd = -1;
+		fd_table[i].fs = NULL;
+		pthread_mutex_init(&(fd_table[i].lock), &attr);
+	}
+	pthread_mutexattr_destroy(&attr);
+
+	/* dynamically load the C library */
+	libc = dlopen("libc.so.6", RTLD_NOW);
+	if (libc == NULL) {
+		printd("Error loading libc: %s\n", dlerror());
+		return 0;
+	}
+
+	/* load symbols from the C library */
+	#define libc_load(F) c_##F = dlsym(libc, #F)
+	libc_load(open);
+	libc_load(open64);
+	libc_load(close);
+	libc_load(unlink);
+	libc_load(read);
+	libc_load(pread);
+	libc_load(pread64);
+	libc_load(readv);
+	libc_load(write);
+	libc_load(pwrite);
+	libc_load(pwrite64);
+	libc_load(writev);
+	libc_load(ftruncate);
+	libc_load(ftruncate64);
+	libc_load(lseek);
+	libc_load(lseek64);
+	libc_load(fsync);
+
+	printd("done\n");
+	return 1;
+}
+
+/*
+ * wrappers
+ */
+
+int open(const char *pathname, int flags, ...)
+{
+	int r, fd;
+	struct jfs *fs;
+	mode_t mode;
+	struct stat st;
+	va_list l;
+
+	if (flags & O_CREAT) {
+		va_start(l, flags);
+		mode = va_arg(l, mode_t);
+		va_end(l);
+	} else {
+		/* set it to 0, it's ignored anyway */
+		mode = 0;
+	}
+
+	if (called) {
+		printd("orig (r)\n");
+		return (*c_open)(pathname, flags, mode);
+	}
+	printd("libjio\n");
+
+	/* skip special files */
+	r = stat(pathname, &st);
+	if (r == 0 && ( S_ISDIR(st.st_mode) \
+			|| S_ISCHR(st.st_mode) \
+			|| S_ISFIFO(st.st_mode) ) ) {
+		printd("orig (s)\n");
+		return (*c_open)(pathname, flags, mode);
+	}
+
+	/* skip /proc and /sys (not /dev, the problematic files are taken care
+	 * of with the stat test above */
+	/* FIXME: this breaks with relative paths */
+	if ( (strncmp("/proc", pathname, 5) == 0) ||
+			(strncmp("/sys", pathname, 4) == 0) ) {
+		printd("orig (f)\n");
+		return (*c_open)(pathname, flags, mode);
+	}
+
+	rec_inc();
+	fs = malloc(sizeof(struct jfs));
+	if (fs == NULL) {
+		rec_dec();
+		return -1;
+	}
+	fd = jopen(fs, pathname, flags, mode, 0);
+	if (fd >= MAXFD) {
+		printd("too many open fds: %d\n", fd);
+		jclose(fs);
+		free(fs);
+		rec_dec();
+		return -1;
+	}
+	rec_dec();
+
+	if (fd < 0) {
+		printd("return %d\n", fd);
+		return fd;
+	}
+
+	fd_lock(fd);
+	fd_table[fd].fd = fd;
+	fd_table[fd].fs = fs;
+	fd_unlock(fd);
+
+	printd("return %d\n", fd);
+	return fd;
+}
+
+/* exact copy of open(), but call c_open64 instead of c_open */
+int open64(const char *pathname, int flags, ...)
+{
+	int r, fd;
+	struct jfs *fs;
+	mode_t mode;
+	struct stat st;
+	va_list l;
+
+	if (flags & O_CREAT) {
+		va_start(l, flags);
+		mode = va_arg(l, mode_t);
+		va_end(l);
+	} else {
+		/* set it to 0, it's ignored anyway */
+		mode = 0;
+	}
+
+	if (called) {
+		printd("orig (r)\n");
+		return (*c_open64)(pathname, flags, mode);
+	}
+	printd("libjio\n");
+
+	/* skip special files */
+	r = stat(pathname, &st);
+	if (r == 0 && ( S_ISDIR(st.st_mode) \
+			|| S_ISCHR(st.st_mode) \
+			|| S_ISFIFO(st.st_mode) ) ) {
+		printd("orig (s)\n");
+		return (*c_open64)(pathname, flags, mode);
+	}
+
+	/* skip /proc and /sys (not /dev, the problematic files are taken care
+	 * of with the stat test above */
+	/* FIXME: this breaks with relative paths */
+	if ( (strncmp("/proc", pathname, 5) == 0) ||
+			(strncmp("/sys", pathname, 4) == 0) ) {
+		printd("orig (f)\n");
+		return (*c_open64)(pathname, flags, mode);
+	}
+
+	rec_inc();
+	fs = malloc(sizeof(struct jfs));
+	if (fs == NULL) {
+		rec_dec();
+		return -1;
+	}
+	fd = jopen(fs, pathname, flags, mode, 0);
+	if (fd >= MAXFD) {
+		printd("too many open fds: %d\n", fd);
+		jclose(fs);
+		free(fs);
+		rec_dec();
+		return -1;
+	}
+	rec_dec();
+
+	if (fd < 0) {
+		printd("return %d\n", fd);
+		return fd;
+	}
+
+	fd_lock(fd);
+	fd_table[fd].fd = fd;
+	fd_table[fd].fs = fs;
+	fd_unlock(fd);
+
+	printd("return %d\n", fd);
+	return fd;
+}
+
+
+int close(int fd)
+{
+	int r;
+	struct jfs *fs;
+
+	if (called) {
+		printd("orig\n");
+		return (*c_close)(fd);
+	}
+
+	if (!fd_lock(fd)) {
+		printd("out of bounds fd: %d\n", fd);
+		return -1;
+	}
+	fs = fd_table[fd].fs;
+	if (fs == NULL) {
+		printd("NULL fs, fd %d\n", fd);
+		fd_unlock(fd);
+		return (*c_close)(fd);
+	}
+	printd("libjio\n");
+
+	rec_inc();
+	r = jclose(fs);
+	if (fd_table[fd].fs != NULL) {
+		free(fd_table[fd].fs);
+		fd_table[fd].fd = -1;
+		fd_table[fd].fs = NULL;
+	}
+	rec_dec();
+	fd_unlock(fd);
+
+	printd("return %d\n", r);
+	return r;
+}
+
+
+int unlink(const char *pathname)
+{
+	int r;
+
+	if (called) {
+		printd("orig\n");
+		return (*c_unlink)(pathname);
+	}
+
+	printd("libjio\n");
+
+	rec_inc();
+	jfsck_cleanup(pathname);
+	rec_dec();
+
+	r = (*c_unlink)(pathname);
+	printd("return %d\n", r);
+
+	return r;
+}
+
+
+/* the rest of the functions are automagically generated from the following
+ * macro. The ugliest. I'm so proud. */
+
+#define mkwrapper(rtype, name, DEF, INVR, INVM)			\
+	rtype name DEF						\
+	{ 							\
+		rtype r;					\
+		struct jfs *fs;					\
+								\
+		if (called) {					\
+			printd("orig\n");			\
+			return (*c_##name) INVR;		\
+		}						\
+								\
+		if (!fd_lock(fd)) {				\
+			printd("out of bounds fd: %d\n", fd);	\
+			return -1;				\
+		}						\
+		fs = fd_table[fd].fs;				\
+		if (fs == NULL) {				\
+			printd("(): NULL fs, fd %d\n", fd); 	\
+			fd_unlock(fd);				\
+			return (*c_##name) INVR;		\
+		}						\
+		printd("libjio\n");				\
+								\
+		rec_inc();					\
+		r = j##name INVM;				\
+		rec_dec();					\
+		fd_unlock(fd);					\
+								\
+		printd("return %lld\n", (long long) r); 	\
+		return r;					\
+	}
+
+
+/* 32-bit versions */
+mkwrapper(ssize_t, read, (int fd, void *buf, size_t count),
+		(fd, buf, count), (fs, buf, count) );
+
+mkwrapper(ssize_t, pread, (int fd, void *buf, size_t count, off_t offset),
+		(fd, buf, count, offset), (fs, buf, count, offset) );
+
+mkwrapper(ssize_t, readv, (int fd, const struct iovec *vector, int count),
+		(fd, vector, count), (fs, vector, count) );
+
+mkwrapper(ssize_t, write, (int fd, const void *buf, size_t count),
+		(fd, buf, count), (fs, buf, count) );
+
+mkwrapper(ssize_t, pwrite,
+		(int fd, const void *buf, size_t count, off_t offset),
+		(fd, buf, count, offset), (fs, buf, count, offset) );
+
+mkwrapper(ssize_t, writev, (int fd, const struct iovec *vector, int count),
+		(fd, vector, count), (fs, vector, count) );
+
+mkwrapper(off_t, lseek, (int fd, off_t offset, int whence),
+		(fd, offset, whence), (fs, offset, whence) );
+
+
+/* libjio defines jtruncate and jsync, not jftruncate and jfsync, which breaks
+ * the macro; so we add a nice #define to unbreak it */
+#define jftruncate jtruncate
+mkwrapper(int, ftruncate, (int fd, off_t length),
+		(fd, length), (fs, length) );
+
+#define jfsync jsync
+mkwrapper(int, fsync, (int fd), (fd), (fs) );
+
+
+/* 64-bit versions */
+#define jpread64 jpread
+mkwrapper(ssize_t, pread64, (int fd, void *buf, size_t count, off64_t offset),
+		(fd, buf, count, offset), (fs, buf, count, offset) );
+
+#define jpwrite64 jpwrite
+mkwrapper(ssize_t, pwrite64,
+		(int fd, const void *buf, size_t count, off64_t offset),
+		(fd, buf, count, offset), (fs, buf, count, offset) );
+
+#define jlseek64 jlseek
+mkwrapper(off64_t, lseek64, (int fd, off64_t offset, int whence),
+		(fd, offset, whence), (fs, offset, whence) );
+
+#define jftruncate64 jtruncate
+mkwrapper(int, ftruncate64, (int fd, off64_t length),
+		(fd, length), (fs, length) );
+
unchanged:
--- cur/Makefile~preloader	2004-10-10 20:58:16.069224640 -0300
+++ cur-root/Makefile	2004-10-10 20:58:16.074223880 -0300
@@ -8,15 +8,15 @@ OBJS = checksum.o common.o trans.o check
 # rules
 default: all
 
-all: shared static jiofsck
+all: libjio.so libjio.a jiofsck
 
-shared: $(OBJS)
+libjio.so: $(OBJS)
 	$(CC) -shared $(OBJS) -o libjio.so
 
-static: $(OBJS)
+libjio.a: $(OBJS)
 	$(AR) cr libjio.a $(OBJS)
 
-jiofsck: jiofsck.o static
+jiofsck: jiofsck.o libjio.a
 	$(CC) jiofsck.o libjio.a -lpthread -o jiofsck
 
 install: all
@@ -33,21 +33,36 @@ install: all
 	@echo "Please run ldconfig to update your library cache"
 	@echo
 
+.c.o:
+	$(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@
+
+
 python: all
 	cd bindings/python && python setup.py build
 
 python_install: python
 	cd bindings/python && python setup.py install
 
-.c.o:
-	$(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@
+
+preload: all
+	install -d bindings/preload/build/
+	$(CC) $(INCLUDES) -Wall -O6 -shared -fPIC \
+		-D_XOPEN_SOURCE=500 \
+		-ldl -lpthread -L. -ljio -I. \
+		bindings/preload/libjio_preload.c \
+		-o bindings/preload/build/libjio_preload.so
+
+preload_install: preload
+	install -d $(PREFIX)/lib
+	install -m 0755 bindings/preload/build/libjio_preload.so $(PREFIX)/lib
 
 
 clean:
 	rm -f $(OBJS) libjio.a libjio.so jiofsck.o jiofsck
 	rm -f *.bb *.bbg *.da *.gcov gmon.out
 	rm -rf bindings/python/build/
+	rm -rf bindings/preload/build/
 
 
-.PHONY: default all shared static install clean
+.PHONY: default all install python python_install preload preload_install clean
 
unchanged:
--- cur-root/bindings/python/libjio.c	2004-10-10 20:59:03.305043704 -0300
+++ cur-root/bindings/python/libjio.c	2004-10-10 20:59:04.610845192 -0300
@@ -381,6 +381,8 @@
 		return NULL;
 
 	rv = jtrans_add(tp->ts, buf, len, offset);
+	if (rv == 0)
+		return PyErr_SetFromErrno(PyExc_IOError);
 
 	return PyLong_FromLong(rv);
 }
@@ -614,6 +616,7 @@
 	PyModule_AddIntConstant(m, "J_COMMITTED", J_COMMITTED);
 	PyModule_AddIntConstant(m, "J_ROLLBACKED", J_ROLLBACKED);
 	PyModule_AddIntConstant(m, "J_ROLLBACKING", J_ROLLBACKING);
+	PyModule_AddIntConstant(m, "J_RDONLY", J_RDONLY);
 	PyModule_AddIntConstant(m, "J_ESUCCESS", J_ESUCCESS);
 	PyModule_AddIntConstant(m, "J_ENOENT", J_ENOENT);
 	PyModule_AddIntConstant(m, "J_ENOJOURNAL", J_ENOJOURNAL);
only in patch2:
unchanged:
--- cur/Make.conf~version-0.21	2004-10-12 22:28:12.670630208 -0300
+++ cur-root/Make.conf	2004-10-12 22:28:17.267931312 -0300
@@ -1,5 +1,5 @@
 
-VERSION="0.20"
+VERSION="0.21"
 
 CC = gcc
 CFLAGS += -Wall -O6 \

