TODO-update.patch
  Remove TODO file

split_rw_locks.patch
  Split file locks into read and write locks.

fsync_dir.patch
  Call fsync() on the journal directory because it's needed on some systems to
  flush metadata.

write_real_data.patch
  Write real data in the transaction file

lingering_transactions.patch
  Implement lingering transactions.

mmap_lock_file.patch
  Use mmap for the lock file

checksum.patch
  Implement checksum for transaction files.

flush_printf.patch
  Flush printf in jiofsck.

doc_update.patch
  Update documentation to reflect changes.

rollback_on_failure.patch
  Try to rollback when a transaction fails to commit

check_fsync_in_jsync.patch
  Make jsync() check fsync() return value.

version-0.17.patch
  Version 0.17



 cur-root/Make.conf      |    2 
 cur-root/Makefile       |    2 
 cur-root/check.c        |   40 +++++++---
 cur-root/checksum.c     |   47 +++++++++++
 cur-root/common.c       |   22 +++--
 cur-root/common.h       |   20 +++++
 cur-root/doc/guide.lyx  |   22 +++++
 cur-root/doc/guide.txt  |   56 +++++++++-----
 cur-root/doc/libjio.3   |    3 
 cur-root/doc/libjio.lyx |   94 ++++++++++++++++++++++-
 cur-root/doc/libjio.txt |  101 ++++++++++++++++++++++---
 cur-root/jiofsck.c      |    5 +
 cur-root/libjio.h       |   16 +++-
 cur-root/trans.c        |  190 +++++++++++++++++++++++++++++++++++-------------
 cur-root/unix.c         |   16 ++--
 cur/doc/TODO            |    7 -
 16 files changed, 528 insertions(+), 115 deletions(-)


unchanged:
--- cur/doc/TODO
+++ /dev/null	2004-04-13 23:59:22.000000000 -0300
@@ -1,7 +0,0 @@
-
- * allow to store the journal somewhere else (or just leave it as-is, and
-	let the user do a simple symlink of the journal directory?)
- * make jfsck return a list of fixed transactions
- * more testing on j{read|write}v()
- * more samples and integration inside the build system
- * a better manpage
unchanged:
--- cur/common.c~split_rw_locks	2004-07-10 22:23:17.000000000 -0300
+++ cur-root/common.c	2004-07-10 22:23:17.000000000 -0300
@@ -24,17 +24,23 @@ off_t plockf(int fd, int cmd, off_t offs
 	struct flock fl;
 	int op;
 
-	if (cmd == F_LOCK) {
+	op = -1;
+	fl.l_type = -1;
+
+	if (cmd & _F_READ) {
+		fl.l_type = F_RDLCK;
+	} else if (cmd & _F_WRITE) {
 		fl.l_type = F_WRLCK;
+	}
+
+	if (cmd & _F_LOCK) {
 		op = F_SETLKW;
-	} else if (cmd == F_ULOCK) {
-		fl.l_type = F_UNLCK;
-		op = F_SETLKW;
-	} else if (cmd == F_TLOCK) {
-		fl.l_type = F_WRLCK;
+	} else if (cmd & _F_TLOCK) {
 		op = F_SETLK;
-	} else
-		return 0;
+	} else if (cmd & F_UNLOCK) {
+		fl.l_type = F_UNLCK;
+		op = F_SETLKW; /* not very relevant */
+	}
 
 	fl.l_whence = SEEK_SET;
 	fl.l_start = offset;
unchanged:
--- cur-root/common.h	2004-07-13 10:49:35.500426136 -0300
+++ cur-root/common.h	2004-07-13 18:16:46.037953808 -0300
@@ -10,10 +10,30 @@
 #define _COMMON_H
 
+#include <sys/types.h>	/* for ssize_t and off_t */
+#include <stdint.h>	/* for uint*_t */
+
+
+#define _F_READ		0x00001
+#define _F_WRITE	0x00010
+#define _F_LOCK		0x00100
+#define _F_TLOCK	0x01000
+#define _F_ULOCK	0x10000
+
+#define F_LOCKR		(_F_LOCK | _F_READ)
+#define F_LOCKW		(_F_LOCK | _F_WRITE)
+#define F_TLOCKR	(_F_TLOCK | _F_READ)
+#define F_TLOCKW	(_F_TLOCK | _F_WRITE)
+#define F_UNLOCK	(_F_ULOCK)
+
+
 off_t plockf(int fd, int cmd, off_t offset, off_t len);
 ssize_t spread(int fd, void *buf, size_t count, off_t offset);
 ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset);
 int get_jdir(const char *filename, char *jdir);
 int get_jtfile(const char *filename, int tid, char *jtfile);
 
+int checksum(int fd, size_t len, uint32_t *csum);
+uint32_t checksum_map(uint8_t *map, size_t count);
+
 #endif
 
unchanged:
--- cur-root/check.c	2004-07-13 18:16:44.799142136 -0300
+++ cur-root/check.c	2004-07-13 18:16:46.037953808 -0300
@@ -60,11 +60,11 @@
 		op->offset = *( (uint64_t *) p);
 		p += 8;
 
-		if (len < (p - map) + op->plen)
+		if (len < (p - map) + op->len)
 			goto error;
 
-		op->pdata = (void *) p;
-		p += op->plen;
+		op->buf = (void *) p;
+		p += op->len;
 
 		if (ts->op == NULL) {
 			ts->op = op;
@@ -93,14 +93,16 @@
 /* check the journal and rollback incomplete transactions */
 int jfsck(const char *name, struct jfsck_result *res)
 {
-	int fd, tfd, rv, i, maxtid;
+	int fd, tfd, rv, i;
+	unsigned int maxtid;
+	uint32_t csum1, csum2;
 	char jdir[PATH_MAX], jlockfile[PATH_MAX], tname[PATH_MAX];
 	struct stat sinfo;
 	struct jfs fs;
 	struct jtrans *curts;
 	DIR *dir;
 	struct dirent *dent;
-	void *map;
+	unsigned char *map;
 	off_t filelen;
 
 
@@ -117,6 +119,10 @@
 	if (rv < 0 || !S_ISDIR(sinfo.st_mode))
 		return J_ENOJOURNAL;
 
+	fs.jdirfd = open(jdir, O_RDONLY);
+	if (fs.jdirfd < 0)
+		return J_ENOJOURNAL;
+
 	/* open the lock file, which is only used to complete the jfs
 	 * structure */
 	snprintf(jlockfile, PATH_MAX, "%s/%s", jdir, "lock");
@@ -125,6 +131,11 @@
 		return J_ENOJOURNAL;
 	fs.jfd = rv;
 
+	fs.jmap = (int *) mmap(NULL, sizeof(unsigned int),
+			PROT_READ | PROT_WRITE, MAP_SHARED, fs.jfd, 0);
+	if (fs.jmap == MAP_FAILED)
+		return J_ENOJOURNAL;
+
 	dir = opendir(jdir);
 	if (dir == NULL)
 		return J_ENOJOURNAL;
@@ -174,7 +185,7 @@
 
 		/* try to lock the transaction file, if it's locked then it is
 		 * currently being used so we skip it */
-		rv = plockf(tfd, F_TLOCK, 0, 0);
+		rv = plockf(tfd, F_TLOCKW, 0, 0);
 		if (rv == -1) {
 			res->in_progress++;
 			goto loop;
@@ -182,13 +193,24 @@
 
 		filelen = lseek(tfd, 0, SEEK_END);
 		map = mmap(0, filelen, PROT_READ, MAP_SHARED, tfd, 0);
-		rv = fill_trans((unsigned char *) map, filelen, curts);
+		rv = fill_trans(map, filelen, curts);
 		if (rv != 1) {
 			res->broken++;
 			goto loop;
 		}
 
-		rv = jtrans_rollback(curts);
+		/* verify the checksum */
+		csum1 = checksum_map(map, filelen - (sizeof(uint32_t)));
+		csum2 = * (uint32_t *) (map + filelen - (sizeof(uint32_t)));
+		if (csum1 != csum2) {
+			res->corrupt++;
+			goto loop;
+		}
+
+		/* remove flags from the transaction */
+		curts->flags = 0;
+
+		rv = jtrans_commit(curts);
 
 		munmap(map, filelen);
 
@@ -196,7 +218,7 @@
 			res->apply_error++;
 			goto loop;
 		}
-		res->rollbacked++;
+		res->reapplied++;
 
 
 loop:
unchanged:
--- cur-root/trans.c	2004-07-14 23:22:45.546010384 -0300
+++ cur-root/trans.c	2004-07-14 23:22:51.418117688 -0300
@@ -17,6 +17,7 @@
 #include <stdio.h>
 #include <dirent.h>
 #include <errno.h>
+#include <sys/mman.h>
 
 #include "libjio.h"
 #include "common.h"
@@ -29,33 +30,24 @@
 /* gets a new transaction id */
 static unsigned int get_tid(struct jfs *fs)
 {
-	unsigned int curid;
-	int r, rv;
+	unsigned int curid, rv;
 
 	/* lock the whole file */
-	plockf(fs->jfd, F_LOCK, 0, 0);
+	plockf(fs->jfd, F_LOCKW, 0, 0);
 
 	/* read the current max. curid */
-	r = spread(fs->jfd, &curid, sizeof(curid), 0);
-	if (r != sizeof(curid)) {
-		rv = 0;
-		goto exit;
-	}
+	curid = *(fs->jmap);
 
 	/* increment it and handle overflows */
 	rv = curid + 1;
 	if (rv == 0)
-		rv = 1;
+		goto exit;
 
 	/* write to the file descriptor */
-	r = spwrite(fs->jfd, &rv, sizeof(rv), 0);
-	if (r != sizeof(curid)) {
-		rv = 0;
-		goto exit;
-	}
+	*(fs->jmap) = rv;
 
 exit:
-	plockf(fs->jfd, F_ULOCK, 0, 0);
+	plockf(fs->jfd, F_UNLOCK, 0, 0);
 	return rv;
 }
 
@@ -63,17 +55,13 @@
 static void free_tid(struct jfs *fs, unsigned int tid)
 {
 	unsigned int curid, i;
-	int r;
 	char name[PATH_MAX];
 
 	/* lock the whole file */
-	plockf(fs->jfd, F_LOCK, 0, 0);
+	plockf(fs->jfd, F_LOCKW, 0, 0);
 
 	/* read the current max. curid */
-	r = spread(fs->jfd, &curid, sizeof(curid), 0);
-	if (r != sizeof(curid)) {
-		goto exit;
-	}
+	curid = *(fs->jmap);
 
 	if (tid < curid) {
 		/* we're not freeing the max. curid, so we just return */
@@ -92,14 +80,11 @@
 		}
 
 		/* and save it */
-		r = spwrite(fs->jfd, &i, sizeof(i), 0);
-		if (r != sizeof(curid)) {
-			goto exit;
-		}
+		*(fs->jmap) = i;
 	}
 
 exit:
-	plockf(fs->jfd, F_ULOCK, 0, 0);
+	plockf(fs->jfd, F_UNLOCK, 0, 0);
 	return;
 }
 
@@ -194,9 +179,11 @@
 int jtrans_commit(struct jtrans *ts)
 {
 	int id, rv, fd = -1;
+	uint32_t csum;
 	char *name;
 	unsigned char *buf_init, *bufp;
 	struct joper *op;
+	struct jlinger *linger;
 	off_t curpos = 0;
 	size_t written = 0;
 
@@ -218,7 +205,7 @@
 		goto exit;
 
 	/* and lock it */
-	plockf(fd, F_LOCK, 0, 0);
+	plockf(fd, F_LOCKW, 0, 0);
 
 	ts->id = id;
 	ts->name = name;
@@ -255,7 +242,7 @@
 	 * break atomicity warantees if we need to rollback */
 	if (!(ts->flags & J_NOLOCK)) {
 		for (op = ts->op; op != NULL; op = op->next) {
-			rv = plockf(ts->fs->fd, F_LOCK, op->offset, op->len);
+			rv = plockf(ts->fs->fd, F_LOCKW, op->offset, op->len);
 			if (rv == -1)
 				/* note it can fail with EDEADLK */
 				goto exit;
@@ -265,10 +252,11 @@
 
 	/* save each transacion in the file */
 	for (op = ts->op; op != NULL; op = op->next) {
-		/* read the current content only if it's not there yet, which
-		 * is the normal case, but for rollbacking we fill it
+		/* read the current content only if the transaction is not
+		 * marked as NOROLLBACK, and if the data is not there yet,
+		 * which is the normal case, but for rollbacking we fill it
 		 * ourselves */
-		if (op->pdata == NULL) {
+		if (!(ts->flags & J_NOROLLBACK) && (op->pdata == NULL)) {
 			op->pdata = malloc(op->len);
 			if (op->pdata == NULL)
 				goto exit;
@@ -313,26 +301,48 @@
 		curpos += J_DISKOPHEADSIZE;
 
 		/* and save it to the disk */
-		rv = spwrite(fd, op->pdata, op->plen, curpos);
-		if (rv != op->plen)
+		rv = spwrite(fd, op->buf, op->len, curpos);
+		if (rv != op->len)
 			goto exit;
 
-		curpos += op->plen;
+		curpos += op->len;
 	}
 
+	/* compute and save the checksum */
+	if (!checksum(fd, curpos, &csum))
+		goto exit;
+
+	rv = spwrite(fd, &csum, sizeof(uint32_t), curpos);
+	if (rv != sizeof(uint32_t))
+		goto exit;
+	curpos += sizeof(uint32_t);
+
 	/* this is a simple but efficient optimization: instead of doing
 	 * everything O_SYNC, we sync at this point only, this way we avoid
 	 * doing a lot of very small writes; in case of a crash the
 	 * transaction file is only useful if it's complete (ie. after this
-	 * point) so we only flush here */
-	fsync(fd);
+	 * point) so we only flush here (both data and metadata) */
+	if (fsync(fd) != 0)
+		goto exit;
+	if (fsync(ts->fs->jdirfd) != 0) {
+		/* it seems to be legal that fsync() on directories is not
+		 * implemented, so if this fails with EINVAL or EBADF, just
+		 * call a global sync(); which is awful (and might still
+		 * return before metadata is done) but it seems to be the
+		 * saner choice; otherwise we just fail */
+		if (errno == EINVAL || errno == EBADF) {
+			sync();
+		} else {
+			goto exit;
+		}
+	}
 
 	/* now that we have a safe transaction file, let's apply it */
 	written = 0;
 	for (op = ts->op; op != NULL; op = op->next) {
 		rv = spwrite(ts->fs->fd, op->buf, op->len, op->offset);
 
-		plockf(ts->fs->fd, F_ULOCK, op->offset, op->len);
+		plockf(ts->fs->fd, F_UNLOCK, op->offset, op->len);
 		op->locked = 0;
 
 		if (rv != op->len)
@@ -341,20 +351,56 @@
 		written += rv;
 	}
 
-	/* the transaction has been applied, so we cleanup and remove it from
-	 * the disk */
-	free_tid(ts->fs, ts->id);
-	unlink(name);
+	if (ts->flags & J_LINGER) {
+		linger = malloc(sizeof(struct jlinger));
+		if (linger == NULL)
+			goto exit;
+
+		linger->id = id;
+		linger->name = strdup(name);
+		linger->next = ts->fs->ltrans;
+
+		ts->fs->ltrans = linger;
+	} else {
+		/* the transaction has been applied, so we cleanup and remove
+		 * it from the disk */
+		unlink(name);
+		free_tid(ts->fs, ts->id);
+	}
 
 	/* mark the transaction as commited, _after_ it was removed */
 	ts->flags = ts->flags | J_COMMITED;
 
 
 exit:
+	/* If the transaction failed we try to recover by rollbacking it
+	 * NOTE: on extreme conditions (ENOSPC/disk failure) this can fail
+	 * too! There's nothing much we can do in that case, the caller should
+	 * take care of it by itself.
+	 * The transaction file might be OK at this point, so the data could
+	 * be recovered by a posterior jfsck(); however, that's not what the
+	 * user expects (after all, if we return failure, new data should
+	 * never appear), so we remove the transaction file.
+	 * Transactions that were successfuly recovered by rollbacking them
+	 * will have J_ROLLBACKED in their flags, so the caller can verify if
+	 * the failure was recovered or not. */
+	if (!(ts->flags & J_COMMITED)) {
+		unlink(name);
+		free_tid(ts->fs, ts->id);
+
+		rv = ts->flags;
+		ts->flags = ts->flags | J_NOLOCK;
+		if (jtrans_rollback(ts) >= 0) {
+			ts->flags = rv | J_ROLLBACKED;
+		} else {
+			ts->flags = rv;
+		}
+	}
+
 	close(fd);
 	for (op = ts->op; op != NULL; op = op->next) {
 		if (op->locked)
-			plockf(ts->fs->fd, F_ULOCK, op->offset, op->len);
+			plockf(ts->fs->fd, F_UNLOCK, op->offset, op->len);
 	}
 
 	pthread_mutex_unlock(&(ts->lock));
@@ -375,8 +421,9 @@
 
 	/* FIXME: this looks like a mess! */
 
-	if (ts->op == NULL) {
-		/* we're trying to rollback an empty transaction */
+	if (ts->op == NULL || ts->flags & J_NOROLLBACK) {
+		/* we're either trying to rollback an empty or transaction, or
+		 * a one marked without rollbacking support */
 		return 0;
 	}
 
@@ -451,6 +498,7 @@
 	fs->fd = fd;
 	fs->name = strdup(name);
 	fs->flags = jflags;
+	fs->ltrans = NULL;
 
 	/* Note on fs->lock usage: this lock is used only inside the wrappers,
 	 * and exclusively to protect the file pointer. This means that it
@@ -473,6 +521,12 @@
 	if (rv < 0 || !S_ISDIR(sinfo.st_mode))
 		return -1;
 
+	/* open the directory, we will use it to flush transaction files'
+	 * metadata in jtrans_commit() */
+	fs->jdirfd = open(jdir, O_RDONLY);
+	if (fs->jdirfd < 0)
+		return -1;
+
 	snprintf(jlockfile, PATH_MAX, "%s/%s", jdir, "lock");
 	jfd = open(jlockfile, O_RDWR | O_CREAT, 0600);
 	if (jfd < 0)
@@ -481,26 +535,62 @@
 	/* initialize the lock file by writing the first tid to it, but only
 	 * if its empty, otherwise there is a race if two processes call
 	 * jopen() simultaneously and both initialize the file */
-	plockf(jfd, F_LOCK, 0, 0);
+	plockf(jfd, F_LOCKW, 0, 0);
 	lstat(jlockfile, &sinfo);
-	if (sinfo.st_size == 0) {
+	if (sinfo.st_size != sizeof(unsigned int)) {
 		t = 1;
-		rv = write(jfd, &t, sizeof(t));
+		rv = spwrite(jfd, &t, sizeof(t), 0);
 		if (rv != sizeof(t)) {
-			plockf(jfd, F_ULOCK, 0, 0);
+			plockf(jfd, F_UNLOCK, 0, 0);
 			return -1;
 		}
 	}
-	plockf(jfd, F_ULOCK, 0, 0);
+	plockf(jfd, F_UNLOCK, 0, 0);
 
 	fs->jfd = jfd;
 
+	fs->jmap = (int *) mmap(NULL, sizeof(unsigned int),
+			PROT_READ | PROT_WRITE, MAP_SHARED, jfd, 0);
+	if (fs->jmap == MAP_FAILED)
+		return -1;
+
 	return fd;
 }
 
+/* sync a file (makes sense only if using lingering transactions) */
+int jsync(struct jfs *fs)
+{
+	int rv;
+	struct jlinger *linger, *ltmp;
+
+	pthread_mutex_lock(&(fs->lock));
+
+	rv = fsync(fs->fd);
+	if (rv != 0)
+		goto exit;
+
+	linger = fs->ltrans;
+	while (linger != NULL) {
+		free_tid(fs, linger->id);
+		unlink(linger->name);
+		free(linger->name);
+
+		ltmp = linger->next;
+		free(linger);
+
+		linger = ltmp;
+	}
+
+exit:
+	pthread_mutex_unlock(&(fs->lock));
+	return rv;
+}
+
 /* close a file */
 int jclose(struct jfs *fs)
 {
+	if (jsync(fs))
+		return -1;
 	if (close(fs->fd))
 		return -1;
 	if (close(fs->jfd))
@@ -508,6 +598,8 @@
 	if (fs->name)
 		/* allocated by strdup() in jopen() */
 		free(fs->name);
+	munmap(fs->jmap, sizeof(unsigned int));
+
 	return 0;
 }
 
unchanged:
--- cur/unix.c~split_rw_locks	2004-07-13 10:50:47.057547800 -0300
+++ cur-root/unix.c	2004-07-13 10:49:11.334099976 -0300
@@ -27,9 +27,9 @@ ssize_t jread(struct jfs *fs, void *buf,
 
 	pos = lseek(fs->fd, 0, SEEK_CUR);
 
-	plockf(fs->fd, F_LOCK, pos, count);
+	plockf(fs->fd, F_LOCKR, pos, count);
 	rv = spread(fs->fd, buf, count, pos);
-	plockf(fs->fd, F_ULOCK, pos, count);
+	plockf(fs->fd, F_UNLOCK, pos, count);
 
 	if (rv == count) {
 		/* if success, advance the file pointer */
@@ -46,9 +46,9 @@ ssize_t jpread(struct jfs *fs, void *buf
 {
 	int rv;
 
-	plockf(fs->fd, F_LOCK, offset, count);
+	plockf(fs->fd, F_LOCKR, offset, count);
 	rv = spread(fs->fd, buf, count, offset);
-	plockf(fs->fd, F_ULOCK, offset, count);
+	plockf(fs->fd, F_UNLOCK, offset, count);
 
 	return rv;
 }
@@ -66,9 +66,9 @@ ssize_t jreadv(struct jfs *fs, struct io
 
 	pthread_mutex_lock(&(fs->lock));
 	pos = lseek(fs->fd, 0, SEEK_CUR);
-	plockf(fs->fd, F_LOCK, pos, count);
+	plockf(fs->fd, F_LOCKR, pos, count);
 	rv = readv(fs->fd, vector, count);
-	plockf(fs->fd, F_ULOCK, pos, count);
+	plockf(fs->fd, F_UNLOCK, pos, count);
 	pthread_mutex_unlock(&(fs->lock));
 
 	return rv;
@@ -162,9 +162,9 @@ int jtruncate(struct jfs *fs, off_t leng
 	int rv;
 
 	/* lock from length to the end of file */
-	plockf(fs->fd, F_LOCK, length, 0);
+	plockf(fs->fd, F_LOCKW, length, 0);
 	rv = ftruncate(fs->fd, length);
-	plockf(fs->fd, F_ULOCK, length, 0);
+	plockf(fs->fd, F_UNLOCK, length, 0);
 
 	return rv;
 }
unchanged:
--- cur-root/libjio.h	2004-07-13 18:16:46.038953656 -0300
+++ cur-root/libjio.h	2004-07-14 23:22:51.418117688 -0300
@@ -24,8 +24,11 @@
 struct jfs {
 	int fd;			/* main file descriptor */
 	char *name;		/* and its name */
+	int jdirfd;		/* journal directory file descriptor */
 	int jfd;		/* journal's lock file descriptor */
+	int *jmap;		/* journal's lock file mmap area */
 	int flags;		/* journal flags */
+	struct jlinger *ltrans;	/* lingered transactions */
 	pthread_mutex_t lock;	/* a soft lock used in some operations */
 };
 
@@ -52,13 +55,21 @@
 	struct joper *op;	/* list of operations */
 };
 
+/* lingered transaction */
+struct jlinger {
+	int id;			/* transaction id */
+	char *name;		/* name of the transaction file */
+	struct jlinger *next;
+};
+
 struct jfsck_result {
 	int total;		/* total transactions files we looked at */
 	int invalid;		/* invalid files in the journal directory */
 	int in_progress;	/* transactions in progress */
 	int broken;		/* transactions broken */
+	int corrupt;		/* corrupt transactions */
 	int apply_error;	/* errors applying the transaction */
-	int rollbacked;		/* transactions that were rollbacked */
+	int reapplied;		/* transactions that were reapplied */
 };
 
 
@@ -87,6 +98,7 @@
 int jtrans_commit(struct jtrans *ts);
 int jtrans_rollback(struct jtrans *ts);
 void jtrans_free(struct jtrans *ts);
+int jsync(struct jfs *fs);
 int jclose(struct jfs *fs);
 
 
@@ -121,6 +133,8 @@
 
 /* jfs constants */
 #define J_NOLOCK	1	/* don't lock the file before operating on it */
+#define J_NOROLLBACK	2	/* no need to read rollback information */
+#define J_LINGER	3	/* use lingering transactions */
 
 /* jtrans constants */
 #define J_COMMITED	1	/* mark a transaction as commited */
unchanged:
--- cur-root/jiofsck.c	2004-07-13 18:16:46.038953656 -0300
+++ cur-root/jiofsck.c	2004-07-13 18:16:47.154784024 -0300
@@ -45,6 +45,7 @@
 	memset(&res, 0, sizeof(res));
 
 	printf("Checking journal: ");
+	fflush(stdout);
 	rv = jfsck(file, &res);
 
 	if (rv == J_ENOENT) {
@@ -60,6 +61,7 @@
 
 	if (do_cleanup) {
 		printf("Cleaning journal: ");
+		fflush(stdout);
 		if (!jfsck_cleanup(file)) {
 			printf("Error cleaning journal\n");
 			return 1;
@@ -75,8 +77,9 @@
 	printf("Invalid:\t %d\n", res.invalid);
 	printf("In progress:\t %d\n", res.in_progress);
 	printf("Broken:\t\t %d\n", res.broken);
+	printf("Corrupt:\t %d\n", res.corrupt);
 	printf("Apply error:\t %d\n", res.apply_error);
-	printf("Rollbacked:\t %d\n", res.rollbacked);
+	printf("Reapplied:\t %d\n", res.reapplied);
 	printf("\n");
 
 	if (!do_cleanup) {
unchanged:
--- /dev/null	2004-04-13 23:59:22.000000000 -0300
+++ cur-root/checksum.c	2004-07-13 18:16:46.036953960 -0300
@@ -0,0 +1,47 @@
+
+/*
+ * libjio - A library for Journaled I/O
+ * Alberto Bertogli (albertogli@telpin.com.ar)
+ *
+ * Checksum functions
+ * Based on RFC 1071, "Computing the Internet Checksum"
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include "common.h"
+
+
+int checksum(int fd, size_t len, uint32_t *csum)
+{
+	uint8_t *map;
+
+	map = (uint8_t *) mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+	if (map == MAP_FAILED)
+		return 0;
+
+	*csum = checksum_map(map, len);
+
+	munmap(map, len);
+	return 1;
+}
+
+uint32_t checksum_map(uint8_t *map, size_t count)
+{
+	uint32_t sum = 0;
+
+	while( count > 1 )  {
+		sum += * (uint16_t *) map++;
+		count -= 2;
+	}
+
+	if( count > 0 )
+		sum += * (uint8_t *) map;
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+
+	return ~sum;
+}
+
unchanged:
--- cur/Makefile~checksum	2004-07-13 18:16:46.031954720 -0300
+++ cur-root/Makefile	2004-07-13 18:16:46.037953808 -0300
@@ -3,7 +3,7 @@ include Make.conf
 
 
 # objects to build
-OBJS = common.o trans.o check.o unix.o ansi.o
+OBJS = checksum.o common.o trans.o check.o unix.o ansi.o
 
 # rules
 default: all
unchanged:
--- cur/doc/libjio.lyx~doc_update	2004-07-13 18:16:48.000000000 -0300
+++ cur-root/doc/libjio.lyx	2004-07-14 23:14:27.947656832 -0300
@@ -129,7 +129,8 @@ This warnings are no different from a no
 The transaction file
 \layout Standard
 
-The transaction file is composed of two main parts: the header and the payload.
+The transaction file is composed of three main parts: the header, the payload
+ and the checksum.
 \layout Standard
 
 The header holds basic information about the transaction itself, including
@@ -138,7 +139,9 @@ The header holds basic information about
  two parts: the first one includes static information about the operation
  (the lenght of the data, the offset of the file where it should be applied,
  etc.) and the data itself, which is saved by the library prior applying
- the commit, so transactions can be rollbacked.
+ the commit, so transactions can be reapplied if necesary.
+ The last part is just a 32 bit integer with the checksum of all the previous
+ data, used for integrity verification during the recovery process.
 \layout Section
 
 The commit procedure
@@ -201,7 +204,7 @@ Write the header
 Read all the previous data from the file
 \layout Itemize
 
-Write the previous data in the transaction
+Write the data in the transaction
 \layout Itemize
 
 Write the data to the file
@@ -325,6 +328,91 @@ In any case, after making the recovery y
  transaction atomicity was preserved.
 \layout Section
 
+Advanced flags
+\layout Standard
+
+The library allows to set flags to transactions in order to support special
+ features and behaviour changes that might be useful in special cases.
+ In this section, we describe the most relevant ones.
+\layout Subsection
+
+Avoid rollbacking
+\layout Standard
+
+If you are completely sure that you will never need to rollback a transaction,
+ there is one flag, 
+\emph on 
+J_NOROLLBACK
+\emph default 
+, that will tell the library to avoid reading the rollback information from
+ the file when applying a transaction.
+ It can be useful when transactions are very very big, or there are several
+ memory constraints, or reading is really synchronous.
+ It is also very very dangerous because if for some reason the transaction
+ fails to apply you will not be able to recover it.
+\layout Subsection
+
+Skip locking
+\layout Standard
+
+In some cases, you might not want the library to lock the file itself, because
+ you need to do it yourself.
+ For this cases, the flag J_NOLOCK makes the commit procedure skip locking
+ regions.
+ You need to be quite careful with this flag because if you don't take good
+ care of locking, it will lead to corruption.
+\layout Subsection
+
+Lingering transactions
+\layout Standard
+
+We call lingering transactions to a small but intresting variant of the
+ regular transactions described throughout this text.
+\layout Standard
+
+If we go back at the commit procedure, we will see that first we save all
+ the data to the transaction file, then write the file, and finally remove
+ the transaction file, so data gets written twice synchronously.
+\layout Standard
+
+The problem with this approach is performance: it's quite slow because all
+ the writes and seeks involved.
+ Besides, it makes no use of the OS write caching capabilities, and it optimizes
+ for the uncommon case of a crash.
+\layout Standard
+
+Lingering transactions is a special way of dealing with the transactions
+ we have already seen.
+ After writing the transaction file and making sure it has hit the media,
+ the data is already safe.
+ So then we write to the real file, but this time 
+\emph on 
+asynchronously
+\emph default 
+, and let the OS perform the write caching and defer the real operation
+ to the media.
+ Then, instead of removing the transaction file, we leave it.
+ At this point, we know the transaction file is safe, but as the real file
+ has not been synchronized yet, the data state is still uncertain; however,
+ if we crash, there will be enough data to recover.
+\layout Standard
+
+Usually, OS do write caching and delay the proper write to the media and
+ perform it when the time is right or when it's forced by a fsync(), so
+ the performance goes up a lot.
+\layout Standard
+
+In this mode, you should call jsync() frequently, which calls fsync() on
+ the file making sure the data is safe, and after that removes all the lingering
+ transactions.
+\layout Standard
+
+The downside of lingering transactions are the additional space needed to
+ hold them, and the fact that if you crash there will be more transactions
+ to reapply, and might take longer.
+ But if you jsync() often, that shouldn't be noticeable.
+\layout Section
+
 UNIX API
 \layout Standard
 
unchanged:
--- cur/doc/libjio.txt~doc_update	2004-07-13 18:16:48.000000000 -0300
+++ cur-root/doc/libjio.txt	2004-07-14 23:14:58.365032688 -0300
@@ -10,9 +10,13 @@ Table of Contents
 3 The commit procedure
 4 The rollback procedure
 5 The recovery procedure
-6 UNIX API
-7 ACID (or How does libjio fit into theory)
-8 Working from outside
+6 Advanced flags
+    6.1 Avoid rollbacking
+    6.2 Skip locking
+    6.3 Lingering transactions
+7 UNIX API
+8 ACID (or How does libjio fit into theory)
+9 Working from outside
 
 
 
@@ -86,8 +90,8 @@ working on them. 
 
 2.1 The transaction file
 
-The transaction file is composed of two main parts: the 
-header and the payload.
+The transaction file is composed of three main parts: 
+the header, the payload and the checksum.
 
 The header holds basic information about the 
 transaction itself, including the ID, some flags, and 
@@ -97,7 +101,10 @@ two parts: the first one includes static
 about the operation (the lenght of the data, the offset 
 of the file where it should be applied, etc.) and the 
 data itself, which is saved by the library prior 
-applying the commit, so transactions can be rollbacked.
+applying the commit, so transactions can be reapplied 
+if necesary. The last part is just a 32 bit integer 
+with the checksum of all the previous data, used for 
+integrity verification during the recovery process.
 
 3 The commit procedure
 
@@ -135,7 +142,7 @@ way, inside jtrans_commit():
 
 * Read all the previous data from the file
 
-* Write the previous data in the transaction
+* Write the data in the transaction
 
 * Write the data to the file
 
@@ -214,7 +221,81 @@ remove the journal entirely and let the 
 a new one, and you can be sure that transaction 
 atomicity was preserved.
 
-6 UNIX API
+6 Advanced flags
+
+The library allows to set flags to transactions in 
+order to support special features and behaviour changes 
+that might be useful in special cases. In this section, 
+we describe the most relevant ones.
+
+6.1 Avoid rollbacking
+
+If you are completely sure that you will never need to 
+rollback a transaction, there is one flag, 
+J_NOROLLBACK, that will tell the library to avoid 
+reading the rollback information from the file when 
+applying a transaction. It can be useful when 
+transactions are very very big, or there are several 
+memory constraints, or reading is really synchronous. 
+It is also very very dangerous because if for some 
+reason the transaction fails to apply you will not be 
+able to recover it.
+
+6.2 Skip locking
+
+In some cases, you might not want the library to lock 
+the file itself, because you need to do it yourself. 
+For this cases, the flag J_NOLOCK makes the commit 
+procedure skip locking regions. You need to be quite 
+careful with this flag because if you don't take good 
+care of locking, it will lead to corruption.
+
+6.3 Lingering transactions
+
+We call lingering transactions to a small but 
+intresting variant of the regular transactions 
+described throughout this text.
+
+If we go back at the commit procedure, we will see that 
+first we save all the data to the transaction file, 
+then write the file, and finally remove the transaction 
+file, so data gets written twice synchronously.
+
+The problem with this approach is performance: it's 
+quite slow because all the writes and seeks involved. 
+Besides, it makes no use of the OS write caching 
+capabilities, and it optimizes for the uncommon case of 
+a crash.
+
+Lingering transactions is a special way of dealing with 
+the transactions we have already seen. After writing 
+the transaction file and making sure it has hit the 
+media, the data is already safe. So then we write to 
+the real file, but this time asynchronously, and let 
+the OS perform the write caching and defer the real 
+operation to the media. Then, instead of removing the 
+transaction file, we leave it. At this point, we know 
+the transaction file is safe, but as the real file has 
+not been synchronized yet, the data state is still 
+uncertain; however, if we crash, there will be enough 
+data to recover.
+
+Usually, OS do write caching and delay the proper write 
+to the media and perform it when the time is right or 
+when it's forced by a fsync(), so the performance goes 
+up a lot.
+
+In this mode, you should call jsync() frequently, which 
+calls fsync() on the file making sure the data is safe, 
+and after that removes all the lingering transactions.
+
+The downside of lingering transactions are the 
+additional space needed to hold them, and the fact that 
+if you crash there will be more transactions to 
+reapply, and might take longer. But if you jsync() 
+often, that shouldn't be noticeable.
+
+7 UNIX API
 
 We call UNIX API to the functions provided by the 
 library that emulate the good old UNIX file 
@@ -235,7 +316,7 @@ list them here for completion:
 
 * jclose()
 
-7 ACID (or How does libjio fit into theory)
+8 ACID (or How does libjio fit into theory)
 
 I haven't read much theory about this, and the library 
 was implemented basically by common sense and not 
@@ -282,7 +363,7 @@ Let's take a look one by one:
   syncronous I/O, data is safely written and can be 
   recovered after a crash.
 
-8 Working from outside
+9 Working from outside
 
 If you want, and are careful enough, you can safely do 
 I/O without using the library. Here I'll give you some 
unchanged:
--- cur/doc/guide.lyx~doc_update	2004-07-13 18:16:48.000000000 -0300
+++ cur-root/doc/guide.lyx	2004-07-14 23:06:47.499655568 -0300
@@ -480,6 +480,28 @@ You can also do this manually with an ut
 jiofsck
 \emph default 
 , which can be used from the shell to perform the checking and cleanup.
+\layout Subsection
+
+Lingering transactions
+\layout Standard
+
+If you need to increase performance, you can use lingering transactions.
+ In this mode, transactions take up more disk space but allows you to do
+ the synchronous write only once, making commits much faster.
+ To use them, just add 
+\family typewriter 
+J_LINGER
+\family default 
+ to the jflags parameter in 
+\family typewriter 
+jopen()
+\family default 
+.
+ It is very wise to call 
+\family typewriter 
+jsync()
+\family default 
+ frequently to avoid using up too much space.
 \layout Section
 
 Disk layout
unchanged:
--- cur/doc/guide.txt~doc_update	2004-07-13 18:16:48.000000000 -0300
+++ cur-root/doc/guide.txt	2004-07-14 23:15:05.762908040 -0300
@@ -12,6 +12,7 @@ Table of Contents
     5.1 Interaction with reads
     5.2 Rollback
     5.3 Integrity checking and recovery
+    5.4 Lingering transactions
 6 Disk layout
 7 Other APIs
     7.1 UNIX API
@@ -56,12 +57,12 @@ represent an ordered group of operations
 The act of committing a transaction means writing all 
 the elements of the list; and rollbacking means to undo 
 a previous commit, and leave the data just as it was 
-before doing the commit (while all this definitions may
-seem obvious to some people, it requires special attention
-because there are a lot of different definitions, and it's
-not that common to see "transaction" applied to file I/O
-(it's a term used mostly on database stuff), so it's
-important to clarify before continuing).
+before doing the commit.While all this definitions may seem obvious to some 
+people, it requires special attention because there are 
+a lot of different definitions, and it's not that 
+common to see "transaction" applied to file I/O (it's a 
+term used mostly on database stuff), so it's important 
+to clarify before continuing.
 
 It's important to note that the library not only 
 provides a convenient and easy API to perform this kind 
@@ -141,15 +142,27 @@ Let's put it all together and code a nic
 program (return values are ignored for simplicity):
 
 char buf[] = "Hello world!";
+
 struct jfs file;
+
 struct jtrans trans;
 
+
+
 jopen(&file, "filename", O_RDWR | O_CREAT, 0600, 0);
+
 jtrans_init(&file, &trans);
+
+
+
 jtrans_add(&trans, buf, strlen(buf), 0);
 
+
+
 jtrans_commit(&trans);
 
+
+
 jclose(&file);
 
 As we've seen, we open the file and initialize the 
@@ -161,7 +174,7 @@ with jtrans_commit(), and finally close 
 
 5 Advanced functions
 
-5.1 Interaction with reads
+5.1 Interaction with reads<sub:Interaction-with-reads>
 
 So far we've seen how to use the library to perform 
 writes, but what about reads? The only and main issue 
@@ -176,11 +189,10 @@ This set of functions are very similar t
 (read(), readv(), etc.); and in fact are named after 
 them: they're called jread(), jreadv() and jpread(); 
 and have the same parameters except for the first one, 
-which instead of a file descriptor is a file structure (in
-fact, this set of functions is a part of what is called
-the "UNIX API", which is described below). Bear in mind
-that transactions are only visible by reads after you
-commit them with jtrans_commit().
+which instead of a file descriptor is a file structureIn fact, this set of functions is a part of what is 
+called the "UNIX API", which is described below.
+. Bear in mind that transactions are only visible by 
+reads after you commit them with jtrans_commit().
 
 5.2 Rollback
 
@@ -222,6 +234,16 @@ You can also do this manually with an ut
 jiofsck, which can be used from the shell to perform 
 the checking and cleanup.
 
+5.4 Lingering transactions
+
+If you need to increase performance, you can use 
+lingering transactions. In this mode, transactions take 
+up more disk space but allows you to do the synchronous 
+write only once, making commits much faster. To use 
+them, just add J_LINGER to the jflags parameter in 
+jopen(). It is very wise to call jsync() frequently to 
+avoid using up too much space.
+
 6 Disk layout
 
 The library creates a single directory for each file 
@@ -264,11 +286,11 @@ the manual page to see the details, but 
 like their UNIX version, only that they preserve 
 atomicity and thread-safety within each call.
 
-In particular, the group of functions related to reading
-(which was described above in "Interaction with reads")
-are extremely useful because they take care of the locking
-needed for the library proper behaviour. You should use
-them instead of the regular calls.
+In particular, the group of functions related to 
+reading (which was described above in [sub:Interaction-with-reads]) are extremely 
+useful because they take care of the locking needed for 
+the library proper behaviour. You should use them 
+instead of the regular calls.
 
 The full function list is available on the man page and 
 I won't reproduce it here; however the naming is quite 
unchanged:
--- cur/doc/libjio.3~doc_update	2004-07-14 23:19:42.570826840 -0300
+++ cur-root/doc/libjio.3	2004-07-14 23:20:49.426663208 -0300
@@ -22,6 +22,8 @@ libjio - A library for Journaled I/O
 
 .BI "int jtruncate(struct jfs *" fs ", off_t " lenght " );
 
+.BI "int jsync(struct jfs *" fs " );
+
 .BI "int jclose(struct jfs *" fs " );
 
 .BI "void jtrans_init(struct jfs *" fs " ,struct jtrans *" ts " );
@@ -190,4 +192,5 @@ albertogli@telpin.com.ar.
 .BR pread (2),
 .BR pwrite (2),
 .BR ftruncate (2),
+.BR fsync (2),
 .BR close (2)
only in patch2:
unchanged:
--- cur/Make.conf~version-0.17	2004-07-14 23:24:43.812031216 -0300
+++ cur-root/Make.conf	2004-07-14 23:24:49.658142472 -0300
@@ -1,5 +1,5 @@
 
-VERSION="0.15"
+VERSION="0.17"
 
 CC = gcc
 CFLAGS += -Wall -O6 \

