 .gitignore                                         |    5 +
 INSTALL                                            |    3 +-
 UPGRADING                                          |   12 +
 bindings/preload/Makefile                          |    5 +-
 bindings/python/libjio.c                           |  285 +++++++++++-
 bindings/python/setup.py                           |    2 +-
 doc/guide.rst                                      |   70 ++--
 doc/libjio.rst                                     |   20 +-
 libjio/Makefile                                    |   73 +++-
 libjio/autosync.c                                  |    2 +
 libjio/check.c                                     |  220 ++++------
 libjio/checksum.c                                  |  105 +++--
 libjio/common.c                                    |  118 +++++-
 libjio/common.h                                    |    7 +-
 libjio/compat.c                                    |   52 +--
 libjio/compat.h                                    |   24 +-
 libjio/doxygen/{Doxyfile.base => Doxyfile.base.in} |    2 +-
 libjio/doxygen/Makefile                            |   15 +-
 libjio/jiofsck.c                                   |   35 +-
 libjio/journal.c                                   |  467 +++++++++++++++-----
 libjio/journal.h                                   |   16 +-
 libjio/libjio.3                                    |   67 ++--
 libjio/libjio.h                                    |   78 +++-
 libjio/{libjio.skel.pc => libjio.pc.in}            |    2 +-
 libjio/trans.c                                     |  473 +++++++++++++-------
 libjio/trans.h                                     |   82 ++--
 libjio/unix.c                                      |   58 ++--
 samples/full.c                                     |    6 +-
 samples/jio3.c                                     |   10 +-
 tests/behaviour/runtests                           |    6 +
 tests/behaviour/t_corruption.py                    |   92 ++++-
 tests/behaviour/t_fi.py                            |   30 +-
 tests/behaviour/t_normal.py                        |  249 ++++++++++-
 tests/behaviour/tf.py                              |   69 ++--
 tests/stress/jiostress                             |  107 ++++-
 35 files changed, 2094 insertions(+), 773 deletions(-)

diff --git a/.gitignore b/.gitignore
index c009b08..5520f01 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,9 @@
 *.o
 *.a
 *.so
+*.so.*
+*.gcda
+*.gcno
 jiofsck
 libjio.pc
 samples/full
@@ -13,4 +16,6 @@ tests/performance/random
 *.pyo
 libjio/doxygen/doc.internal
 libjio/doxygen/doc.public
+libjio/doxygen/Doxygen.base
+libjio/build-flags
 
diff --git a/INSTALL b/INSTALL
index ff96287..13e479c 100644
--- a/INSTALL
+++ b/INSTALL
@@ -14,7 +14,8 @@ After installing, you need to run "ldconfig" in order to update your dynamic
 library cache.
 
 If the default "make" is not GNU make (like in BSD systems), use "gmake"
-instead.
+instead. If the default "install" is not GNU/BSD compatible (like in Solaris
+systems), use "gmake INSTALL=ginstall".
 
 
 Special builds
diff --git a/UPGRADING b/UPGRADING
index dc7ded7..0db68b4 100644
--- a/UPGRADING
+++ b/UPGRADING
@@ -6,6 +6,18 @@ You should always clean all your files before upgrading. While I don't expect
 the transaction on-disk format to change, it's a good practise and it doesn't
 take much effort. When it's mandatory, it will be noted.
 
+-> 0.90 (On-disk format change, pre 1.0 freeze)
+  - The way transactions are stored on disk has changed. It is mandatory that
+    you jfsck all your files before upgrading. Hopefully this will be the last
+    backwards-incompatible format change before 1.0.
+  - jtrans_new() now takes an additional flags parameter.
+  - jopen() jflags parameter is now unsigned.
+  - J_NOCLEANUP was removed in favour of J_CLEANUP, and the default behaviour
+    of jfsck() is now not to clean up unless J_CLEANUP is passed in the flags.
+  - jtrans_add() renamed to jtrans_add_w().
+  - jtrans_commit() returns 0 on success, instead of the amount of bytes
+    written.
+
 -> 0.50 (Big API change)
   - Structures are now opaque types:
     struct jfs -> jfs_t; jopen() returns a pointer to one, jclose() frees it.
diff --git a/bindings/preload/Makefile b/bindings/preload/Makefile
index bfe34b7..318648f 100644
--- a/bindings/preload/Makefile
+++ b/bindings/preload/Makefile
@@ -9,6 +9,7 @@ ALL_CFLAGS = $(CFLAGS) $(MANDATORY_CFLAGS) -fPIC
 
 
 PREFIX = /usr/local
+INSTALL = install
 
 ifneq ($(V), 1)
         NICE_CC = @echo "  CC  $@"; $(CC)
@@ -29,8 +30,8 @@ preload: libjio_preload.o
 	$(NICE_CC) $(ALL_CFLAGS) -c $< -o $@
 
 install: preload
-	install -d $(PREFIX)/lib
-	install -m 0755 libjio_preload.so $(PREFIX)/lib
+	$(INSTALL) -d $(PREFIX)/lib
+	$(INSTALL) -m 0755 libjio_preload.so $(PREFIX)/lib
 
 clean:
 	rm -f libjio_preload.o libjio_preload.so
diff --git a/bindings/python/libjio.c b/bindings/python/libjio.c
index bd44992..46b257d 100644
--- a/bindings/python/libjio.c
+++ b/bindings/python/libjio.c
@@ -4,6 +4,7 @@
  * Alberto Bertogli (albertito@blitiri.com.ar)
  */
 
+#define PY_SSIZE_T_CLEAN 1
 
 #include <Python.h>
 
@@ -49,6 +50,10 @@ typedef struct {
 	PyObject_HEAD
 	jtrans_t *ts;
 	jfile_object *jfile;
+
+	/* add_r() allocates views which must be freed by the destructor */
+	Py_buffer **views;
+	size_t nviews;
 } jtrans_object;
 
 static PyTypeObject jtrans_type;
@@ -162,6 +167,106 @@ static PyObject *jf_pread(jfile_object *fp, PyObject *args)
 	return r;
 }
 
+/* readv */
+PyDoc_STRVAR(jf_readv__doc,
+"readv([buf1, buf2, ...])\n\
+\n\
+Reads the data from the file into the different buffers; returns the\n\
+number of bytes written.\n\
+The buffers must be objects that support slice assignment, like bytearray\n\
+(but *not* str).\n\
+Only available in Python >= 2.6.\n\
+It's a wrapper to jreadv().\n");
+
+/* readv requires the new Py_buffer interface, which is only available in
+ * Python >= 2.6 */
+#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION >= 6)
+static PyObject *jf_readv(jfile_object *fp, PyObject *args)
+{
+	long rv;
+	PyObject *buffers, *buf;
+	Py_buffer *views = NULL;
+	ssize_t len, pos = 0;
+	struct iovec *iov = NULL;
+
+	if (!PyArg_ParseTuple(args, "O:readv", &buffers))
+		return NULL;
+
+	len = PySequence_Length(buffers);
+	if (len < 0) {
+		PyErr_SetString(PyExc_TypeError, "iterable expected");
+		return NULL;
+	}
+
+	iov = malloc(sizeof(struct iovec) * len);
+	if (iov == NULL)
+		return PyErr_NoMemory();
+
+	views = malloc(sizeof(Py_buffer) * len);
+	if (views == NULL) {
+		free(iov);
+		return PyErr_NoMemory();
+	}
+
+	for (pos = 0; pos < len; pos++) {
+		buf = PySequence_GetItem(buffers, pos);
+		if (buf == NULL)
+			goto error;
+
+		if (!PyObject_CheckBuffer(buf)) {
+			PyErr_SetString(PyExc_TypeError,
+				"object must support the buffer interface");
+			goto error;
+		}
+
+		if (PyObject_GetBuffer(buf, &(views[pos]), PyBUF_CONTIG))
+			goto error;
+
+		iov[pos].iov_base = views[pos].buf;
+		iov[pos].iov_len = views[pos].len;
+	}
+
+	Py_BEGIN_ALLOW_THREADS
+	rv = jreadv(fp->fs, iov, len);
+	Py_END_ALLOW_THREADS
+
+	for (pos = 0; pos < len; pos++) {
+		PyBuffer_Release(&(views[pos]));
+	}
+
+	free(iov);
+	free(views);
+
+	if (rv < 0)
+		return PyErr_SetFromErrno(PyExc_IOError);
+
+	return PyLong_FromLong(rv);
+
+error:
+	/* We might get here with pos between 0 and len, so we must release
+	 * only what we have already taken */
+	pos--;
+	while (pos >= 0) {
+		PyBuffer_Release(&(views[pos]));
+		pos--;
+	}
+
+	free(iov);
+	free(views);
+	return NULL;
+}
+
+#else
+
+static PyObject *jf_readv(jfile_object *fp, PyObject *args)
+{
+	PyErr_SetString(PyExc_NotImplementedError,
+			"only supported in Python >= 2.6");
+	return NULL;
+}
+
+#endif /* python version >= 2.6 */
+
 /* write */
 PyDoc_STRVAR(jf_write__doc,
 "write(buf)\n\
@@ -217,6 +322,60 @@ static PyObject *jf_pwrite(jfile_object *fp, PyObject *args)
 	return PyLong_FromLong(rv);
 }
 
+/* writev */
+PyDoc_STRVAR(jf_writev__doc,
+"writev([buf1, buf2, ...])\n\
+\n\
+Writes the data contained in the different buffers to the file, returns the\n\
+number of bytes written.\n\
+The buffers must be strings or string-alike objects, like str or bytes.\n\
+It's a wrapper to jwritev().\n");
+
+static PyObject *jf_writev(jfile_object *fp, PyObject *args)
+{
+	long rv;
+	PyObject *buffers, *buf;
+	ssize_t len, pos;
+	struct iovec *iov;
+
+	if (!PyArg_ParseTuple(args, "O:writev", &buffers))
+		return NULL;
+
+	len = PySequence_Length(buffers);
+	if (len < 0) {
+		PyErr_SetString(PyExc_TypeError, "iterable expected");
+		return NULL;
+	}
+
+	iov = malloc(sizeof(struct iovec) * len);
+	if (iov == NULL)
+		return PyErr_NoMemory();
+
+	for (pos = 0; pos < len; pos++) {
+		buf = PySequence_GetItem(buffers, pos);
+		if (buf == NULL)
+			return NULL;
+
+		iov[pos].iov_len = 0;
+		if (!PyArg_Parse(buf, "s#:writev", &(iov[pos].iov_base),
+				&(iov[pos].iov_len))) {
+			free(iov);
+			return NULL;
+		}
+	}
+
+	Py_BEGIN_ALLOW_THREADS
+	rv = jwritev(fp->fs, iov, len);
+	Py_END_ALLOW_THREADS
+
+	free(iov);
+
+	if (rv < 0)
+		return PyErr_SetFromErrno(PyExc_IOError);
+
+	return PyLong_FromLong(rv);
+}
+
 /* truncate */
 PyDoc_STRVAR(jf_truncate__doc,
 "truncate(lenght)\n\
@@ -385,8 +544,9 @@ It's a wrapper to jtrans_new().\n");
 static PyObject *jf_new_trans(jfile_object *fp, PyObject *args)
 {
 	jtrans_object *tp;
+	unsigned int flags = 0;
 
-	if (!PyArg_ParseTuple(args, ":new_trans"))
+	if (!PyArg_ParseTuple(args, "|I:new_trans", &flags))
 		return NULL;
 
 #ifdef PYTHON3
@@ -397,7 +557,7 @@ static PyObject *jf_new_trans(jfile_object *fp, PyObject *args)
 	if (tp == NULL)
 		return NULL;
 
-	tp->ts = jtrans_new(fp->fs);
+	tp->ts = jtrans_new(fp->fs, flags);
 	if(tp->ts == NULL) {
 		return PyErr_NoMemory();
 	}
@@ -406,6 +566,9 @@ static PyObject *jf_new_trans(jfile_object *fp, PyObject *args)
 	tp->jfile = fp;
 	Py_INCREF(fp);
 
+	tp->views = NULL;
+	tp->nviews = 0;
+
 	return (PyObject *) tp;
 }
 
@@ -414,8 +577,10 @@ static PyMethodDef jfile_methods[] = {
 	{ "fileno", (PyCFunction) jf_fileno, METH_VARARGS, jf_fileno__doc },
 	{ "read", (PyCFunction) jf_read, METH_VARARGS, jf_read__doc },
 	{ "pread", (PyCFunction) jf_pread, METH_VARARGS, jf_pread__doc },
+	{ "readv", (PyCFunction) jf_readv, METH_VARARGS, jf_readv__doc },
 	{ "write", (PyCFunction) jf_write, METH_VARARGS, jf_write__doc },
 	{ "pwrite", (PyCFunction) jf_pwrite, METH_VARARGS, jf_pwrite__doc },
+	{ "writev", (PyCFunction) jf_writev, METH_VARARGS, jf_writev__doc },
 	{ "truncate", (PyCFunction) jf_truncate, METH_VARARGS,
 		jf_truncate__doc },
 	{ "lseek", (PyCFunction) jf_lseek, METH_VARARGS, jf_lseek__doc },
@@ -471,34 +636,119 @@ static void jt_dealloc(jtrans_object *tp)
 		jtrans_free(tp->ts);
 	}
 	Py_DECREF(tp->jfile);
+
+	/* release views allocated by add_r */
+	while (tp->nviews) {
+		PyBuffer_Release(tp->views[tp->nviews - 1]);
+		free(tp->views[tp->nviews - 1]);
+		tp->nviews--;
+	}
+	free(tp->views);
+
 	PyObject_Del(tp);
 }
 
-/* add */
-PyDoc_STRVAR(jt_add__doc,
-"add(buf, offset)\n\
+/* add_w */
+PyDoc_STRVAR(jt_add_w__doc,
+"add_w(buf, offset)\n\
 \n\
 Add an operation to write the given buffer at the given offset to the\n\
 transaction.\n\
-It's a wrapper to jtrans_add().\n");
+It's a wrapper to jtrans_add_w().\n");
 
-static PyObject *jt_add(jtrans_object *tp, PyObject *args)
+static PyObject *jt_add_w(jtrans_object *tp, PyObject *args)
 {
 	long rv;
 	int len;
 	long long offset;
 	unsigned char *buf;
 
-	if (!PyArg_ParseTuple(args, "s#L:add", &buf, &len, &offset))
+	if (!PyArg_ParseTuple(args, "s#L:add_w", &buf, &len, &offset))
 		return NULL;
 
-	rv = jtrans_add(tp->ts, buf, len, offset);
+	rv = jtrans_add_w(tp->ts, buf, len, offset);
 	if (rv < 0)
 		return PyErr_SetFromErrno(PyExc_IOError);
 
 	return PyLong_FromLong(rv);
 }
 
+/* add_r */
+PyDoc_STRVAR(jt_add_r__doc,
+"add_r(buf, offset)\n\
+\n\
+Add an operation to read into the given buffer at the given offset to the\n\
+transaction.\n\
+It's a wrapper to jtrans_add_r().\n\
+\n\
+The buffer must be objects that support slice assignment, like bytearray\n\
+(but *not* str).\n\
+Only available in Python >= 2.6.\n");
+
+/* add_r requires the new Py_buffer interface, which is only available in
+ * Python >= 2.6 */
+#if PY_MAJOR_VERSION >= 3 || (PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION >= 6)
+static PyObject *jt_add_r(jtrans_object *tp, PyObject *args)
+{
+	long rv;
+	PyObject *py_buf;
+	unsigned long long offset;
+	Py_buffer *view = NULL, **new_views;
+
+	if (!PyArg_ParseTuple(args, "OL:add_r", &py_buf, &offset))
+		return NULL;
+
+	if (!PyObject_CheckBuffer(py_buf)) {
+		PyErr_SetString(PyExc_TypeError,
+			"object must support the buffer interface");
+		return NULL;
+	}
+
+	view = malloc(sizeof(Py_buffer));
+	if (view == NULL)
+		return PyErr_NoMemory();
+
+	if (PyObject_GetBuffer(py_buf, view, PyBUF_CONTIG)) {
+		free(view);
+		return NULL;
+	}
+
+	Py_BEGIN_ALLOW_THREADS
+	rv = jtrans_add_r(tp->ts, view->buf, view->len, offset);
+	Py_END_ALLOW_THREADS
+
+	if (rv < 0) {
+		PyBuffer_Release(view);
+		free(view);
+		return PyErr_SetFromErrno(PyExc_IOError);
+	}
+
+	new_views = realloc(tp->views, sizeof(Py_buffer *) * tp->nviews + 1);
+	if (new_views == NULL) {
+		PyBuffer_Release(view);
+		free(view);
+		return PyErr_NoMemory();
+	}
+
+	tp->nviews++;
+	tp->views = new_views;
+	tp->views[tp->nviews - 1] = view;
+
+	return PyLong_FromLong(rv);
+}
+
+#else
+
+static PyObject *jt_add_r(jtrans_object *tp, PyObject *args)
+{
+	PyErr_SetString(PyExc_NotImplementedError,
+			"only supported in Python >= 2.6");
+	return NULL;
+}
+
+#endif /* python version >= 2.6 */
+
+
 /* commit */
 PyDoc_STRVAR(jt_commit__doc,
 "commit()\n\
@@ -549,7 +799,8 @@ static PyObject *jt_rollback(jtrans_object *tp, PyObject *args)
 
 /* method table */
 static PyMethodDef jtrans_methods[] = {
-	{ "add", (PyCFunction) jt_add, METH_VARARGS, jt_add__doc },
+	{ "add_r", (PyCFunction) jt_add_r, METH_VARARGS, jt_add_r__doc },
+	{ "add_w", (PyCFunction) jt_add_w, METH_VARARGS, jt_add_w__doc },
 	{ "commit", (PyCFunction) jt_commit, METH_VARARGS, jt_commit__doc },
 	{ "rollback", (PyCFunction) jt_rollback, METH_VARARGS, jt_rollback__doc },
 	{ NULL }
@@ -600,14 +851,16 @@ It's a wrapper to jopen().\n");
 static PyObject *jf_open(PyObject *self, PyObject *args)
 {
 	char *file;
-	int flags, mode, jflags;
+	int flags = O_RDONLY;
+	int mode = 0600;
+	unsigned int jflags = 0;
 	jfile_object *fp;
 
 	flags = O_RDWR;
 	mode = 0600;
 	jflags = 0;
 
-	if (!PyArg_ParseTuple(args, "s|iii:open", &file, &flags, &mode,
+	if (!PyArg_ParseTuple(args, "s|iiI:open", &file, &flags, &mode,
 				&jflags))
 		return NULL;
 
@@ -679,7 +932,6 @@ static PyObject *jf_jfsck(PyObject *self, PyObject *args, PyObject *kw)
 	PyDict_SetItemString(dict, "in_progress", PyLong_FromLong(res.in_progress));
 	PyDict_SetItemString(dict, "broken", PyLong_FromLong(res.broken));
 	PyDict_SetItemString(dict, "corrupt", PyLong_FromLong(res.corrupt));
-	PyDict_SetItemString(dict, "apply_error", PyLong_FromLong(res.apply_error));
 	PyDict_SetItemString(dict, "reapplied", PyLong_FromLong(res.reapplied));
 
 	return dict;
@@ -717,10 +969,17 @@ static void populate_module(PyObject *m)
 	PyModule_AddIntConstant(m, "J_ROLLBACKED", J_ROLLBACKED);
 	PyModule_AddIntConstant(m, "J_ROLLBACKING", J_ROLLBACKING);
 	PyModule_AddIntConstant(m, "J_RDONLY", J_RDONLY);
+
+	/* enum jfsck_return */
 	PyModule_AddIntConstant(m, "J_ESUCCESS", J_ESUCCESS);
 	PyModule_AddIntConstant(m, "J_ENOENT", J_ENOENT);
 	PyModule_AddIntConstant(m, "J_ENOJOURNAL", J_ENOJOURNAL);
 	PyModule_AddIntConstant(m, "J_ENOMEM", J_ENOMEM);
+	PyModule_AddIntConstant(m, "J_ECLEANUP", J_ECLEANUP);
+	PyModule_AddIntConstant(m, "J_EIO", J_EIO);
+
+	/* jfsck() flags */
+	PyModule_AddIntConstant(m, "J_CLEANUP", J_CLEANUP);
 
 	/* open constants (at least the POSIX ones) */
 	PyModule_AddIntConstant(m, "O_RDONLY", O_RDONLY);
diff --git a/bindings/python/setup.py b/bindings/python/setup.py
index a7a077b..eea600f 100644
--- a/bindings/python/setup.py
+++ b/bindings/python/setup.py
@@ -20,7 +20,7 @@ libjio = Extension("libjio",
 
 setup(
 	name = 'libjio',
-	version = '0.51',
+	version = '0.90',
 	description = "A library for journaled, transactional I/O",
 	author = "Alberto Bertogli",
 	author_email = "albertito@blitiri.com.ar",
diff --git a/doc/guide.rst b/doc/guide.rst
index 5202ff2..df9e939 100644
--- a/doc/guide.rst
+++ b/doc/guide.rst
@@ -7,8 +7,8 @@ Introduction
 
 This small document attempts to serve as a guide to the programmer who wants
 to use the library. It's not a replacement for the man page or reading the
-code; but it's a good starting point for everyone who wants to get involved
-with it.
+code, but is a good starting point for everyone who wants to get involved with
+it.
 
 The library is not complex to use at all, and the interfaces were designed to
 be as intuitive as possible, so the text is structured as a guide to present
@@ -27,7 +27,7 @@ the form of a directory with files on it) to guarantee coherency even after a
 crash at any point.
 
 In this document, we think of a transaction as a list of *(buffer, length,
-offset)* to be applied to a file. That triplet is called an *operation*, so we
+offset)* to be written to a file. That triplet is called an *operation*, so we
 can say that a transaction represents an ordered group of operations on the
 same file.
 
@@ -75,9 +75,9 @@ Now that you have opened a file, the next thing to do would be to create a
 transaction. This is what *jtrans_new()* is for: it takes a file structure and
 returns a new transaction structure.
 
-To add an operation to the transaction, use *jtrans_add()*. You can add as
-many operations as you want. Operations within a transaction may overlap, and
-will be applied in order.
+To add a write operation to the transaction, use *jtrans_add_w()*. You can add
+as many operations as you want. Operations within a transaction may overlap,
+and will be applied in order.
 
 Finally, to apply our transaction to the file, use *jtrans_commit()*.
 
@@ -92,8 +92,8 @@ are ignored for simplicity)::
 
   file = jopen("filename", O_RDWR | O_CREAT, 0600, 0);
 
-  trans = jtrans_new(file);
-  jtrans_add(trans, buf, strlen(buf), 0);
+  trans = jtrans_new(file, 0);
+  jtrans_add_w(trans, buf, strlen(buf), 0);
   jtrans_commit(trans);
   jtrans_free(trans);
 
@@ -101,7 +101,7 @@ are ignored for simplicity)::
 
 As we've seen, you open the file and initialize the structure with *jopen()*
 (with the parameter *jflags* being the last 0), create a new transaction with
-*jtrans_new()*, then add an operation with *jtrans_add()* (the last 0 is the
+*jtrans_new()*, then add an operation with *jtrans_add_w()* (the last 0 is the
 offset, in this case the beginning of the file), commit the transaction with
 *jtrans_commit()*, free it with *jtrans_free()*, and finally close the file
 with *jclose()*.
@@ -111,6 +111,9 @@ Reading is much easier: the library provides three functions, *jread()*,
 *readv()*, except that they play safe with libjio's writing code. You should
 use these to read from files when using libjio.
 
+You can also add read operations to a transaction using *jtrans_add_r()*, and
+the data will be read atomically at commit time.
+
 
 Integrity checking and recovery
 -------------------------------
@@ -168,21 +171,23 @@ here; however the naming is quite simple: just prepend a 'j' to all the names:
 *jread()*, *jwrite()*, etc.
 
 
-Threads and locking
--------------------
+Processes, threads and locking
+------------------------------
 
-The library is completely safe to use in multithreaded applications; however,
-there are some very basic and intuitive locking rules you have to bear in
-mind.
+The library is completely safe to use in multi-process and/or multi-thread
+applications, as long as you abide by the following rules:
 
-You need to care only when closing and checking for integrity. In
-practise, that means that you shouldn't call *jclose()* in the middle of an
-I/O operation, just like you do when using the normal UNIX calls. In the case
-of *jfsck()*, you shouldn't invoke it for the same file more than once at the
-time, or when the file is open by any other process (this requirement will be
-lifted in future releases).
+ - Within a process, a file must not be held open at the same time more than
+   once, due to *fcntl()* locking limitations. Opening, closing and then
+   opening again is safe.
+ - *jclose()* must only be called when there are no other I/O operations in
+   progress.
+ - *jfsck()* must only be called when the file is known **not** to be open by
+   any process.
+ - *jmove_journal()* must only be called when the file is known **not** to be
+   open by any other processes.
 
-All other operations (commiting a transaction, rolling it back, adding
+All other operations (committing a transaction, rolling it back, adding
 operations, etc.) and all the wrappers are safe and don't require any special
 considerations.
 
@@ -195,7 +200,9 @@ this mode, transactions take up more disk space but allows you to do the
 synchronous write only once, making commits much faster. To use them, just add
 *J_LINGER* to the *jflags* parameter in *jopen()*. You should call *jsync()*
 frequently to avoid using up too much space, or start an asynchronous thread
-that calls *jsync()* automatically using *jfs_autosync_start()*.
+that calls *jsync()* automatically using *jfs_autosync_start()*. Note that
+files opened with this mode must not be opened by more than one process at the
+same time.
 
 
 Disk layout
@@ -226,13 +233,18 @@ if you need them.
 Compiling and linking
 ---------------------
 
-When you want to use your library, besides including the "libjio.h" header,
-you have to make sure your application uses the Large File Support ("LFS" from
-now on), to be able to handle large files properly. This means that you will
-have to pass some special standard flags to the compiler, so your C library
-uses the same data types as the library. For instance, on 32-bit platforms
-(like x86), when using LFS, offsets are usually 64 bits, as opposed to the
-usual 32.
+If you have *pkg-config* in your build environment, then you can get the build
+flags you need to use when building and linking against the library by
+running::
+
+  pkg-config --cflags --libs libjio
+
+If *pkg-config* is not available, you have to make sure your application uses
+the Large File Support (*"LFS"* from now on), to be able to handle large files
+properly. This means that you will have to pass some special standard flags to
+the compiler, so your C library uses the same data types as the library. For
+instance, on 32-bit platforms (like x86), when using LFS, offsets are usually
+64 bits, as opposed to the usual 32.
 
 The library is always built with LFS; however, linking it against an
 application without LFS support could lead to serious problems because this
diff --git a/doc/libjio.rst b/doc/libjio.rst
index 6bc4f3c..c166bf4 100644
--- a/doc/libjio.rst
+++ b/doc/libjio.rst
@@ -59,20 +59,20 @@ careful when doing strange things with files while working on them.
 The transaction file
 ~~~~~~~~~~~~~~~~~~~~
 
-The transaction file is composed of two main parts: the header and the
-payload.
+The transaction file is composed of three main parts: the header, the
+operations, and the trailer.
 
 The header holds basic information about the transaction itself, including the
-ID, some flags, and the amount of operations it includes. Then the payload has
-all the operations one after the other, divided in two parts: the first one
-includes static information about the operation (the length of the data, the
-offset of the file where it should be applied, etc.) and the data itself,
-which is saved by the library prior applying the commit, so transactions can
-be rollbacked.
+version, the transaction ID, and its flags.
 
-At the end of the transaction file, a checksum is stored, to detect journal
-corruption.
+Then the operation part has all the operations one after the other, prepending
+the operation data with a per-operation header that includes the length of the
+data and the offset of the file where it should be applied, and then the data
+itself.
 
+Finally, the trailer contains the number of operations included in it and a
+checksum of the whole file. Both fields are used to detect broken or corrupted
+transactions.
 
 The commit procedure
 --------------------
diff --git a/libjio/Makefile b/libjio/Makefile
index 8ce1ca1..2e25ca3 100644
--- a/libjio/Makefile
+++ b/libjio/Makefile
@@ -17,10 +17,12 @@ LIBS = -lpthread $(NEED_LIBRT)
 
 ifdef DEBUG
 ALL_CFLAGS += -g
+ALL_LDFLAGS += -g
 endif
 
 ifdef PROFILE
 ALL_CFLAGS += -g -pg -fprofile-arcs -ftest-coverage
+ALL_LDFLAGS += -g -pg -fprofile-arcs -ftest-coverage -lgcov
 endif
 
 ifdef FI
@@ -31,6 +33,11 @@ endif
 # prefix for installing the binaries
 PREFIX = /usr/local
 
+# prefix for eventual location of binaries
+DESTDIR=$(PREFIX)
+
+# install utility, we assume it's GNU/BSD compatible
+INSTALL=install
 
 ifneq ($(V), 1)
         NICE_CC = @echo "  CC  $@"; $(CC)
@@ -41,6 +48,10 @@ else
 endif
 
 
+LIB_VER=0.90
+LIB_SO_VER=0
+
+
 # objects to build
 OBJS = autosync.o checksum.o common.o compat.o trans.o check.o journal.o \
        unix.o ansi.o
@@ -50,49 +61,67 @@ default: all
 
 all: libjio.so libjio.a libjio.pc jiofsck
 
-libjio.so: $(OBJS)
-	$(NICE_CC) -shared $(ALL_LDFLAGS) $(LIBS) $(OBJS) -o libjio.so
+libjio.so: build-flags $(OBJS)
+	$(NICE_CC) -shared $(ALL_LDFLAGS) \
+		-Wl,-soname,libjio.so.$(LIB_SO_VER) \
+		$(LIBS) $(OBJS) -o libjio.so.$(LIB_VER)
+	ln -fs libjio.so.$(LIB_VER) libjio.so
 
-libjio.a: $(OBJS)
+libjio.a: build-flags $(OBJS)
 	$(NICE_AR) cr libjio.a $(OBJS)
 
-libjio.pc: libjio.skel.pc
+libjio.pc: build-flags libjio.pc.in
 	@echo "generating libjio.pc"
-	@cat libjio.skel.pc | \
-		sed 's@++PREFIX++@$(PREFIX)@g' | \
+	@cat libjio.pc.in | \
+		sed 's@++PREFIX++@$(DESTDIR)@g' | \
+		sed 's@++VERSION++@$(LIB_VER)@g' | \
 		sed 's@++CFLAGS++@$(MANDATORY_CFLAGS)@g' \
 		> libjio.pc
 
-jiofsck: jiofsck.o libjio.a
+jiofsck: build-flags jiofsck.o libjio.a
 	$(NICE_CC) $(ALL_LDFLAGS) jiofsck.o libjio.a $(LIBS) -o jiofsck
 
 install: all
-	install -d $(PREFIX)/lib
-	install -m 0755 libjio.so $(PREFIX)/lib
-	install -m 0644 libjio.a $(PREFIX)/lib
-	install -d $(PREFIX)/include
-	install -m 0644 libjio.h $(PREFIX)/include
-	install -d $(PREFIX)/lib/pkgconfig
-	install -m 644 libjio.pc $(PREFIX)/lib/pkgconfig
-	install -d $(PREFIX)/bin
-	install -m 0775 jiofsck $(PREFIX)/bin
-	install -d $(PREFIX)/man/man3
-	install -m 0644 libjio.3 $(PREFIX)/man/man3/
+	$(INSTALL) -d $(PREFIX)/lib
+	$(INSTALL) -m 0755 libjio.so.$(LIB_VER) $(PREFIX)/lib
+	ln -fs libjio.so.$(LIB_VER) $(PREFIX)/lib/libjio.so
+	ln -fs libjio.so.$(LIB_VER) $(PREFIX)/lib/libjio.so.$(LIB_SO_VER)
+	$(INSTALL) -m 0644 libjio.a $(PREFIX)/lib
+	$(INSTALL) -d $(PREFIX)/include
+	$(INSTALL) -m 0644 libjio.h $(PREFIX)/include
+	$(INSTALL) -d $(PREFIX)/lib/pkgconfig
+	$(INSTALL) -m 644 libjio.pc $(PREFIX)/lib/pkgconfig
+	$(INSTALL) -d $(PREFIX)/bin
+	$(INSTALL) -m 0775 jiofsck $(PREFIX)/bin
+	$(INSTALL) -d $(PREFIX)/share/man/man3
+	$(INSTALL) -m 0644 libjio.3 $(PREFIX)/share/man/man3/
 	@echo
 	@echo "Please run ldconfig to update your library cache"
 	@echo
 
+BF = $(ALL_CFLAGS) ~ $(PREFIX)
+build-flags: .force-build-flags
+	@if [ x"$(BF)" != x"`cat build-flags 2>/dev/null`" ]; then \
+		if [ -f build-flags ]; then \
+			echo "build flags changed, rebuilding"; \
+		fi; \
+		echo "$(BF)" > build-flags; \
+	fi
+
+$(OBJS): build-flags
+
 .c.o:
 	$(NICE_CC) $(ALL_CFLAGS) -c $< -o $@
 
 doxygen:
-	$(MAKE) -C doxygen
+	$(MAKE) LIB_VER=$(LIB_VER) -C doxygen
 
 clean:
-	rm -f $(OBJS) libjio.a libjio.so libjio.pc jiofsck.o jiofsck
-	rm -f *.bb *.bbg *.da *.gcov *.gcno *.gcda gmon.out
+	rm -f libjio.a libjio.so libjio.so.$(LIB_VER) libjio.pc
+	rm -f $(OBJS) jiofsck.o jiofsck
+	rm -f *.bb *.bbg *.da *.gcov *.gcno *.gcda gmon.out build-flags
 	$(MAKE) -C doxygen $@
 
 
-.PHONY: default all install clean doxygen
+.PHONY: default all install clean doxygen .force-build-flags
 
diff --git a/libjio/autosync.c b/libjio/autosync.c
index edc6ec3..ad8b48c 100644
--- a/libjio/autosync.c
+++ b/libjio/autosync.c
@@ -11,6 +11,8 @@
 
 #include "common.h"
 #include "libjio.h"
+#include "compat.h"
+
 
 /** Configuration of an autosync thread */
 struct autosync_cfg {
diff --git a/libjio/check.c b/libjio/check.c
index 03458ab..f3e37e2 100644
--- a/libjio/check.c
+++ b/libjio/check.c
@@ -17,119 +17,40 @@
 
 #include "libjio.h"
 #include "common.h"
+#include "journal.h"
 #include "trans.h"
 
 
-/** Fill a transaction structure from a mmapped transaction file */
-static off_t fill_trans(unsigned char *map, off_t len, struct jtrans *ts)
-{
-	int i;
-	unsigned char *p;
-	struct joper *op, *tmp;
-	off_t translen;
-
-	if (len < J_DISKHEADSIZE)
-		return 0;
-
-	p = map;
-
-	ts->id = *( (uint32_t *) p);
-	p += 4;
-
-	ts->flags = *( (uint32_t *) p);
-	p += 4;
-
-	ts->numops = *( (uint32_t *) p);
-	p += 4;
-
-	translen = J_DISKHEADSIZE;
-
-	for (i = 0; i < ts->numops; i++) {
-		if (p + J_DISKOPHEADSIZE > map + len)
-			goto error;
-
-		op = malloc(sizeof(struct joper));
-		if (op == NULL)
-			goto error;
-
-		op->len = *( (uint32_t *) p);
-		p += 4;
-
-		op->plen = *( (uint32_t *) p);
-		p += 4;
-
-		op->offset = *( (uint64_t *) p);
-		p += 8;
-
-		if (p + op->len > map + len)
-			goto error;
-
-		op->buf = (void *) p;
-		p += op->len;
-
-		op->pdata = NULL;
-
-		if (ts->op == NULL) {
-			ts->op = op;
-			op->prev = NULL;
-			op->next = NULL;
-		} else {
-			for (tmp = ts->op; tmp->next != NULL; tmp = tmp->next)
-				;
-			tmp->next = op;
-			op->prev = tmp;
-			op->next = NULL;
-		}
-
-		translen += J_DISKOPHEADSIZE + op->len;
-	}
-
-	return translen;
-
-error:
-	while (ts->op != NULL) {
-		tmp = ts->op->next;
-		free(ts->op);
-		ts->op = tmp;
-	}
-	return 0;
-}
-
-/** Remove all the files in the journal directory (if any).
+/** Remove the journal directory (if it's clean).
  *
  * @param name path to the file
- * @param jdir path to the journal directory, use NULL for the default
+ * @param jdir path to the journal directory
  * @returns 0 on success, < 0 on error
  */
 static int jfsck_cleanup(const char *name, const char *jdir)
 {
-	char path[PATH_MAX], tfile[PATH_MAX*3];
+	char tfile[PATH_MAX*3];
 	DIR *dir;
 	struct dirent *dent;
 
-	if (jdir == NULL) {
-		if (!get_jdir(name, path))
-			return -1;
-	} else {
-		strcpy(path, jdir);
-	}
-
-	dir = opendir(path);
+	dir = opendir(jdir);
 	if (dir == NULL && errno == ENOENT)
 		/* it doesn't exist, so it's clean */
 		return 0;
 	else if (dir == NULL)
 		return -1;
 
-	for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) {
-		/* we only care about transactions (named as numbers > 0) and
-		 * the lockfile (named "lock"); ignore everything else */
-		if (strcmp(dent->d_name, "lock") && atoi(dent->d_name) <= 0)
+	for (errno = 0, dent = readdir(dir); dent != NULL;
+			errno = 0, dent = readdir(dir)) {
+		/* We only care about files we know, and ignore everything
+		 * else. Note that transactions should have been removed by
+		 * jfsck(), we will not do it to prevent accidental misuse */
+		if (strcmp(dent->d_name, "lock"))
 			continue;
 
 		/* build the full path to the transaction file */
 		memset(tfile, 0, PATH_MAX * 3);
-		strcat(tfile, path);
+		strcat(tfile, jdir);
 		strcat(tfile, "/");
 		strcat(tfile, dent->d_name);
 
@@ -143,10 +64,16 @@ static int jfsck_cleanup(const char *name, const char *jdir)
 			return -1;
 		}
 	}
+
+	if (errno) {
+		closedir(dir);
+		return -1;
+	}
+
 	if (closedir(dir) != 0)
 		return -1;
 
-	if (rmdir(path) != 0)
+	if (rmdir(jdir) != 0)
 		return -1;
 
 	return 0;
@@ -158,16 +85,15 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 {
 	int tfd, rv, i, ret;
 	unsigned int maxtid;
-	uint32_t csum1, csum2;
-	char jlockfile[PATH_MAX], tname[PATH_MAX];
+	char jlockfile[PATH_MAX], tname[PATH_MAX], brokenname[PATH_MAX];
 	struct stat sinfo;
 	struct jfs fs;
 	struct jtrans *curts;
-	struct joper *tmpop;
+	struct operation *tmpop;
 	DIR *dir;
 	struct dirent *dent;
 	unsigned char *map;
-	off_t filelen, translen, lr;
+	off_t filelen, lr;
 
 	tfd = -1;
 	filelen = 0;
@@ -185,12 +111,13 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 	res->in_progress = 0;
 	res->broken = 0;
 	res->corrupt = 0;
-	res->apply_error = 0;
 	res->reapplied = 0;
 
 	fs.fd = open(name, O_RDWR | O_SYNC);
 	if (fs.fd < 0) {
-		ret = J_ENOENT;
+		ret = J_EIO;
+		if (errno == ENOENT)
+			ret = J_ENOENT;
 		goto exit;
 	}
 
@@ -216,14 +143,22 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 	}
 
 	rv = lstat(fs.jdir, &sinfo);
-	if (rv < 0 || !S_ISDIR(sinfo.st_mode)) {
+	if (rv < 0) {
+		ret = J_EIO;
+		if (errno == ENOENT)
+			ret = J_ENOJOURNAL;
+		goto exit;
+	}
+	if (!S_ISDIR(sinfo.st_mode)) {
 		ret = J_ENOJOURNAL;
 		goto exit;
 	}
 
 	fs.jdirfd = open(fs.jdir, O_RDONLY);
 	if (fs.jdirfd < 0) {
-		ret = J_ENOJOURNAL;
+		ret = J_EIO;
+		if (errno == ENOENT)
+			ret = J_ENOJOURNAL;
 		goto exit;
 	}
 
@@ -232,7 +167,9 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 	snprintf(jlockfile, PATH_MAX, "%s/%s", fs.jdir, "lock");
 	rv = open(jlockfile, O_RDWR | O_CREAT, 0600);
 	if (rv < 0) {
-		ret = J_ENOJOURNAL;
+		ret = J_EIO;
+		if (errno == ENOENT)
+			ret = J_ENOJOURNAL;
 		goto exit;
 	}
 	fs.jfd = rv;
@@ -240,20 +177,23 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 	fs.jmap = (unsigned int *) mmap(NULL, sizeof(unsigned int),
 			PROT_READ | PROT_WRITE, MAP_SHARED, fs.jfd, 0);
 	if (fs.jmap == MAP_FAILED) {
-		ret = J_ENOJOURNAL;
+		ret = J_EIO;
 		goto exit;
 	}
 
 	dir = opendir(fs.jdir);
 	if (dir == NULL) {
-		ret = J_ENOJOURNAL;
+		ret = J_EIO;
+		if (errno == ENOENT)
+			ret = J_ENOJOURNAL;
 		goto exit;
 	}
 
 	/* find the greatest transaction number by looking into the journal
 	 * directory */
 	maxtid = 0;
-	for (dent = readdir(dir); dent != NULL; dent = readdir(dir)) {
+	for (errno = 0, dent = readdir(dir); dent != NULL;
+			errno = 0, dent = readdir(dir)) {
 		/* see if the file is named like a transaction, ignore
 		 * otherwise; as transactions are named as numbers > 0, a
 		 * simple atoi() is enough testing */
@@ -263,6 +203,10 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 		if (rv > maxtid)
 			maxtid = rv;
 	}
+	if (errno) {
+		ret = J_EIO;
+		goto exit;
+	}
 
 	/* rewrite the lockfile, writing the new maxtid on it, so that when we
 	 * rollback a transaction it doesn't step over existing ones */
@@ -272,9 +216,22 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 		goto exit;
 	}
 
+	/* remove the broken mark so we can call jtrans_commit() */
+	snprintf(brokenname, PATH_MAX, "%s/broken", fs.jdir);
+	rv = access(brokenname, F_OK);
+	if (rv == 0) {
+		if (unlink(brokenname) != 0) {
+			ret = J_EIO;
+			goto exit;
+		}
+	} else if (errno != ENOENT) {
+		ret = J_EIO;
+		goto exit;
+	}
+
 	/* verify (and possibly fix) all the transactions */
 	for (i = 1; i <= maxtid; i++) {
-		curts = jtrans_new(&fs);
+		curts = jtrans_new(&fs, 0);
 		if (curts == NULL) {
 			ret = J_ENOMEM;
 			goto exit;
@@ -289,8 +246,13 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 		get_jtfile(&fs, i, tname);
 		tfd = open(tname, O_RDWR | O_SYNC, 0600);
 		if (tfd < 0) {
-			res->invalid++;
-			goto loop;
+			if (errno == ENOENT) {
+				res->invalid++;
+				goto nounlink_loop;
+			} else {
+				ret = J_EIO;
+				goto exit;
+			}
 		}
 
 		/* try to lock the transaction file, if it's locked then it is
@@ -302,31 +264,28 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 		}
 
 		filelen = lseek(tfd, 0, SEEK_END);
+		if (filelen == 0) {
+			res->broken++;
+			goto loop;
+		} else if (filelen < 0) {
+			ret = J_EIO;
+			goto exit;
+		}
+
 		/* no overflow problems because we know the transaction size
 		 * is limited to SSIZE_MAX */
 		map = mmap((void *) 0, filelen, PROT_READ, MAP_SHARED, tfd, 0);
 		if (map == MAP_FAILED) {
-			res->broken++;
 			map = NULL;
-			goto loop;
-		}
-		translen = fill_trans(map, filelen, curts);
-		if (translen == 0) {
-			res->broken++;
-			goto loop;
+			ret = J_EIO;
+			goto exit;
 		}
 
-		/* see if there's enough room for the checksum after the
-		 * transaction information */
-		if (filelen != translen + sizeof(uint32_t)) {
+		rv = fill_trans(map, filelen, curts);
+		if (rv == -1) {
 			res->broken++;
 			goto loop;
-		}
-
-		/* verify the checksum */
-		csum1 = checksum_map(map, filelen - (sizeof(uint32_t)));
-		csum2 = * (uint32_t *) (map + filelen - (sizeof(uint32_t)));
-		if (csum1 != csum2) {
+		} else if (rv == -2) {
 			res->corrupt++;
 			goto loop;
 		}
@@ -338,12 +297,18 @@ enum jfsck_return jfsck(const char *name, const char *jdir,
 		rv = jtrans_commit(curts);
 
 		if (rv < 0) {
-			res->apply_error++;
-			goto loop;
+			ret = J_EIO;
+			goto exit;
 		}
 		res->reapplied++;
 
 loop:
+		if (unlink(tname) != 0) {
+			ret = J_EIO;
+			goto exit;
+		}
+
+nounlink_loop:
 		if (tfd >= 0) {
 			close(tfd);
 			tfd = -1;
@@ -364,8 +329,8 @@ loop:
 		res->total++;
 	}
 
-	if ( !(flags & J_NOCLEANUP) ) {
-		if (jfsck_cleanup(name, jdir) < 0) {
+	if (flags & J_CLEANUP) {
+		if (jfsck_cleanup(name, fs.jdir) < 0) {
 			ret = J_ECLEANUP;
 		}
 	}
@@ -385,6 +350,5 @@ exit:
 		munmap(fs.jmap, sizeof(unsigned int));
 
 	return ret;
-
 }
 
diff --git a/libjio/checksum.c b/libjio/checksum.c
index 250a96d..252c03f 100644
--- a/libjio/checksum.c
+++ b/libjio/checksum.c
@@ -1,7 +1,7 @@
 
 /*
  * Checksum functions
- * Based on RFC 1071, "Computing the Internet Checksum"
+ * Uses CRC32c, just because it's decent enough. As defined in RFC 3309.
  */
 
 #include <stddef.h>
@@ -9,40 +9,85 @@
 #include <sys/mman.h>
 #include "common.h"
 
-
-/** Reads the contents of the given fd, up to len bytes, and stores the
- * checksum in csum. Returns 1 on success, 0 on error. */
-int checksum(int fd, size_t len, uint32_t *csum)
+static uint32_t table[256] =
 {
-	uint8_t *map;
-
-	map = (uint8_t *) mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
-	if (map == MAP_FAILED)
-		return 0;
+	0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+	0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+	0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+	0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
+	0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+	0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
+	0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
+	0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
+	0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+	0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+	0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
+	0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
+	0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
+	0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
+	0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+	0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
+	0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
+	0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+	0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
+	0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+	0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
+	0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
+	0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
+	0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
+	0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+	0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
+	0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+	0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
+	0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
+	0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+	0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
+	0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
+	0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
+	0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
+	0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+	0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+	0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
+	0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
+	0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
+	0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+	0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
+	0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
+	0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
+	0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
+	0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+	0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
+	0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
+	0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
+	0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
+	0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+	0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
+	0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
+	0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
+	0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+	0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+	0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
+	0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
+	0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
+	0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
+	0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+	0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
+	0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
+	0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+	0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L,
+};
 
-	*csum = checksum_map(map, len);
-
-	munmap(map, len);
-	return 1;
-}
-
-/** Calculates the checksum of the given buffer, up to count bytes. Returns
- * the checksum. */
-uint32_t checksum_map(uint8_t *map, size_t count)
+/** Calculates the checksum of the given buffer, up to count bytes. Returns the
+ * checksum. The initial crc32 must be 0. */
+uint32_t checksum_buf(uint32_t crc32, const unsigned char *buf, size_t count)
 {
-	uint32_t sum = 0;
+	crc32 = ~crc32;
 
-	while( count > 1 )  {
-		sum += * (uint16_t *) map++;
-		count -= 2;
+	while (count--) {
+		crc32 = (crc32 >> 8) ^ table[(crc32 ^ *buf) & 0xFFL];
+		buf++;
 	}
 
-	if( count > 0 )
-		sum += * (uint8_t *) map;
-
-	while (sum >> 16)
-		sum = (sum & 0xffff) + (sum >> 16);
-
-	return ~sum;
+	return ~crc32;
 }
 
diff --git a/libjio/common.c b/libjio/common.c
index 7a3cdf9..30b7e93 100644
--- a/libjio/common.c
+++ b/libjio/common.c
@@ -11,6 +11,7 @@
 #include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <arpa/inet.h>		/* htonl() and friends */
 
 #include "libjio.h"
 #include "common.h"
@@ -51,17 +52,15 @@ off_t plockf(int fd, int cmd, off_t offset, off_t len)
  * less than count it's because EOF was reached */
 ssize_t spread(int fd, void *buf, size_t count, off_t offset)
 {
-	int rv, c;
+	ssize_t rv;
+	size_t c;
 
 	c = 0;
 
 	while (c < count) {
 		rv = pread(fd, (char *) buf + c, count - c, offset + c);
 
-		if (rv == count)
-			/* we're done */
-			return count;
-		else if (rv < 0)
+		if (rv < 0)
 			/* error */
 			return rv;
 		else if (rv == 0)
@@ -78,18 +77,15 @@ ssize_t spread(int fd, void *buf, size_t count, off_t offset)
 /** Like spread() but for pwrite() */
 ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset)
 {
-	int rv, c;
+	ssize_t rv;
+	size_t c;
 
 	c = 0;
 
 	while (c < count) {
 		rv = pwrite(fd, (char *) buf + c, count - c, offset + c);
 
-		if (rv == count)
-			/* we're done */
-			return count;
-		else if (rv <= 0)
-			/* error/nothing was written */
+		if (rv < 0)
 			return rv;
 
 		/* incomplete write, keep on writing */
@@ -99,6 +95,53 @@ ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset)
 	return count;
 }
 
+/** Like writev() but either fails, or return a complete write.
+ * Note that, as opposed to writev() it WILL MODIFY iov, in particular the
+ * iov_len fields. */
+ssize_t swritev(int fd, struct iovec *iov, int iovcnt)
+{
+	int i;
+	ssize_t rv;
+	size_t c, t, total;
+
+	total = 0;
+	for (i = 0; i < iovcnt; i++)
+		total += iov[i].iov_len;
+
+	c = 0;
+	while (c < total) {
+		rv = writev(fd, iov, iovcnt);
+
+		if (rv < 0)
+			return rv;
+
+		c += rv;
+
+		/* avoid going into the complex calculations for the common
+		 * case of writev() doing a complete write */
+		if (c == total)
+			break;
+
+		/* incomplete write, advance iov and try again */
+		t = 0;
+		for (i = 0; i < iovcnt; i++) {
+			if (t + iov[i].iov_len > rv) {
+				iov[i].iov_base = (unsigned char *)
+					iov[i].iov_base + rv - t;
+				iov[i].iov_len -= rv - t;
+				break;
+			} else {
+				t += iov[i].iov_len;
+			}
+		}
+
+		iovcnt -= i;
+		iov = iov + i;
+	}
+
+	return c;
+}
+
 /** Store in jdir the default journal directory path of the given filename */
 int get_jdir(const char *filename, char *jdir)
 {
@@ -133,3 +176,56 @@ void get_jtfile(struct jfs *fs, unsigned int tid, char *jtfile)
 }
 
 
+/* The ntohll() and htonll() functions are not standard, so we define them
+ * using an UGLY trick because there is no standard way to check for
+ * endianness at runtime. */
+
+/** Convert a 64-bit value between network byte order and host byte order. */
+uint64_t ntohll(uint64_t x)
+{
+	static int endianness = 0;
+
+	/* determine the endianness by checking how htonl() behaves; use -1
+	 * for little endian and 1 for big endian */
+	if (endianness == 0) {
+		if (htonl(1) == 1)
+			endianness = 1;
+		else
+			endianness = -1;
+	}
+
+	if (endianness == 1) {
+		/* big endian */
+		return x;
+	}
+
+	/* little endian */
+	return ( ntohl( (x >> 32) & 0xFFFFFFFF ) | \
+			( (uint64_t) ntohl(x & 0xFFFFFFFF) ) << 32 );
+}
+
+/** Convert a 64-bit value between host byte order and network byte order. */
+uint64_t htonll(uint64_t x)
+{
+	static int endianness = 0;
+
+	/* determine the endianness by checking how htonl() behaves; use -1
+	 * for little endian and 1 for big endian */
+	if (endianness == 0) {
+		if (htonl(1) == 1)
+			endianness = 1;
+		else
+			endianness = -1;
+	}
+
+	if (endianness == 1) {
+		/* big endian */
+		return x;
+	}
+
+	/* little endian */
+	return ( htonl( (x >> 32) & 0xFFFFFFFF ) | \
+			( (uint64_t) htonl(x & 0xFFFFFFFF) ) << 32 );
+}
+
+
diff --git a/libjio/common.h b/libjio/common.h
index 4879eb5..71ccc49 100644
--- a/libjio/common.h
+++ b/libjio/common.h
@@ -8,6 +8,7 @@
 
 #include <sys/types.h>	/* for ssize_t and off_t */
 #include <stdint.h>	/* for uint*_t */
+#include <sys/uio.h>	/* for struct iovec */
 
 #include "libjio.h"	/* for struct jfs */
 #include "fiu-local.h"	/* for fault injection functions */
@@ -72,11 +73,13 @@ struct jfs {
 off_t plockf(int fd, int cmd, off_t offset, off_t len);
 ssize_t spread(int fd, void *buf, size_t count, off_t offset);
 ssize_t spwrite(int fd, const void *buf, size_t count, off_t offset);
+ssize_t swritev(int fd, struct iovec *iov, int iovcnt);
 int get_jdir(const char *filename, char *jdir);
 void get_jtfile(struct jfs *fs, unsigned int tid, char *jtfile);
+uint64_t ntohll(uint64_t x);
+uint64_t htonll(uint64_t x);
 
-int checksum(int fd, size_t len, uint32_t *csum);
-uint32_t checksum_map(uint8_t *map, size_t count);
+uint32_t checksum_buf(uint32_t sum, const unsigned char *buf, size_t count);
 
 void autosync_check(struct jfs *fs);
 
diff --git a/libjio/compat.c b/libjio/compat.c
index 3ad4463..78e14f0 100644
--- a/libjio/compat.c
+++ b/libjio/compat.c
@@ -3,64 +3,53 @@
  * Compatibility functions
  */
 
-/* To get sync_file_range() we need to temporarily define _GNU_SOURCE, which
- * is not the nicest thing, but is not worth defining globally */
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#define _REMOVE_GNU_SOURCE
-#endif
-
-/* Must be down here because otherwise we might try to #include things twice:
- * once with _GNU_SOURCE and one without it */
 #include "compat.h"
+#include <sys/types.h>		/* off_t, size_t */
+#include <unistd.h>		/* fdatasync(), if available */
 
 
 /*
  * sync_file_range() support through an internal similar API
  */
 
-#include <fcntl.h>		/* sync_range_submit(), if possible */
-#include <sys/types.h>		/* off_t, size_t */
-
-#ifdef SYNC_FILE_RANGE_WRITE
-const int have_sync_range = 1;
+#ifdef LACK_SYNC_FILE_RANGE
+#warning "Using fdatasync() instead of sync_file_range()"
+const int have_sync_range = 0;
 
-/** Initiate write-out of the dirty pages in the range */
 int sync_range_submit(int fd, off_t offset, size_t nbytes)
 {
-	/* We don't need SYNC_FILE_RANGE_WAIT_BEFORE because we have exclusive
-	 * access to the range (guaranteed by the caller) */
-	return sync_file_range(fd, offset, nbytes, SYNC_FILE_RANGE_WRITE);
+	return 0;
 }
 
-/** Wait for completion of the previously-submitted I/O in the given ranges.
- * Does NOT force the submission of any new I/O. */
 int sync_range_wait(int fd, off_t offset, size_t nbytes)
 {
-	return sync_file_range(fd, offset, nbytes, SYNC_FILE_RANGE_WAIT_BEFORE);
+	/* fdatasync() waits for the submitted I/O to complete, so it's enough
+	 * to call it once here */
+	return fdatasync(fd);
 }
 
 #else
 
-#warning "Using fdatasync() instead of sync_file_range()"
-const int have_sync_range = 0;
+/** Indicates whether we have a full implementation of sync_range_submit() and
+ * sync_range_wait(), so we can take advantage of it. */
+const int have_sync_range = 1;
 
+/** Initiate write-out of the dirty pages in the range */
 int sync_range_submit(int fd, off_t offset, size_t nbytes)
 {
-	return -1;
+	/* We don't need SYNC_FILE_RANGE_WAIT_BEFORE because we have exclusive
+	 * access to the range (guaranteed by the caller) */
+	return sync_file_range(fd, offset, nbytes, SYNC_FILE_RANGE_WRITE);
 }
 
+/** Wait for completion of the previously-submitted I/O in the given ranges.
+ * Does NOT force the submission of any new I/O. */
 int sync_range_wait(int fd, off_t offset, size_t nbytes)
 {
-	return -1;
+	return sync_file_range(fd, offset, nbytes, SYNC_FILE_RANGE_WAIT_BEFORE);
 }
 
-#endif /* defined SYNC_FILE_RANGE_WRITE */
-
-/* It is no longer needed */
-#ifdef _REMOVE_GNU_SOURCE
-#undef _GNU_SOURCE
-#endif
+#endif /* defined LACK_SYNC_FILE_RANGE */
 
 
 /* When posix_fadvise() is not available, we just show a message since there
@@ -93,6 +82,7 @@ int clock_gettime(int clk_id, struct timespec *tp)
 
 #endif /* defined LACK_CLOCK_GETTIME */
 
+
 #ifdef LACK_FDATASYNC
 #warning "Using fsync() instead of fdatasync()"
 
diff --git a/libjio/compat.h b/libjio/compat.h
index cceeb29..c33baee 100644
--- a/libjio/compat.h
+++ b/libjio/compat.h
@@ -4,12 +4,28 @@
 #ifndef _COMPAT_H
 #define _COMPAT_H
 
-#include <sys/types.h>		/* off_t, size_t */
-
 
 /* sync_file_range() is linux-specific, so we provide an internal similar API,
  * with a constant to be able to check for its presence; the implementation is
- * in compat.c */
+ * in compat.c.
+ *
+ * To get its constants we need to temporarily define _GNU_SOURCE, which is
+ * not the nicest thing, but is not worth defining globally. */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#define _REMOVE_GNU_SOURCE
+#endif
+#include <fcntl.h>		/* SYNC_FILE_RANGE_WRITE, if available */
+#ifdef _REMOVE_GNU_SOURCE
+#undef _REMOVE_GNU_SOURCE
+#undef _GNU_SOURCE
+#endif
+
+#ifndef SYNC_FILE_RANGE_WRITE
+#define LACK_SYNC_FILE_RANGE 1
+#endif
+
+#include <sys/types.h>		/* off_t, size_t */
 extern const int have_sync_range;
 int sync_range_submit(int fd, off_t offset, size_t nbytes);
 int sync_range_wait(int fd, off_t offset, size_t nbytes);
@@ -24,6 +40,7 @@ int sync_range_wait(int fd, off_t offset, size_t nbytes);
 #include <fcntl.h>
 #ifndef POSIX_FADV_WILLNEED
 #define LACK_POSIX_FADVISE 1
+#define POSIX_FADV_WILLNEED 0
 #define posix_fadvise(fd, offset, len, advise)
 #endif
 
@@ -45,6 +62,7 @@ int fdatasync(int fd);
 #ifndef CLOCK_REALTIME
 #define LACK_CLOCK_GETTIME 1
 #define CLOCK_REALTIME 0
+int clock_gettime(int clk_id, struct timespec *tp);
 #endif
 
 #endif
diff --git a/libjio/doxygen/Doxyfile.base b/libjio/doxygen/Doxyfile.base.in
similarity index 99%
rename from libjio/doxygen/Doxyfile.base
rename to libjio/doxygen/Doxyfile.base.in
index aae43eb..3bfcf8f 100644
--- a/libjio/doxygen/Doxyfile.base
+++ b/libjio/doxygen/Doxyfile.base.in
@@ -1,6 +1,6 @@
 DOXYFILE_ENCODING      = UTF-8
 PROJECT_NAME           = libjio
-PROJECT_NUMBER         = 0.51
+PROJECT_NUMBER         = ++VERSION++
 OUTPUT_DIRECTORY       = 
 CREATE_SUBDIRS         = NO
 OUTPUT_LANGUAGE        = English
diff --git a/libjio/doxygen/Makefile b/libjio/doxygen/Makefile
index 3b020d0..d69197f 100644
--- a/libjio/doxygen/Makefile
+++ b/libjio/doxygen/Makefile
@@ -10,14 +10,23 @@ default: all
 
 all: public internal
 
-public:
+# $(LIB_VER) must be defined externally if we want the generated docs to
+# specify a version number. Usually, this Makefile will be invoked by
+# libjio's, which has that variable properly defined.
+Doxyfile.base: Doxyfile.base.in
+	@echo "generating Doxyfile.base"
+	@cat Doxyfile.base.in | \
+		sed 's@++VERSION++@$(LIB_VER)@g' \
+		> Doxyfile.base
+
+public: Doxyfile.base
 	$(NICE_DOXYGEN) Doxyfile.public
 
-internal:
+internal: Doxyfile.base
 	$(NICE_DOXYGEN) Doxyfile.internal
 
 clean:
-	rm -rf doc.internal doc.public
+	rm -rf doc.internal doc.public Doxyfile.base
 
 
 .PHONY: all clean default doxygen internal public
diff --git a/libjio/jiofsck.c b/libjio/jiofsck.c
index 8cfe00f..217cd54 100644
--- a/libjio/jiofsck.c
+++ b/libjio/jiofsck.c
@@ -59,24 +59,39 @@ int main(int argc, char **argv)
 	memset(&res, 0, sizeof(res));
 
 	flags = 0;
-	if (!do_cleanup)
-		flags |= J_NOCLEANUP;
+	if (do_cleanup)
+		flags |= J_CLEANUP;
 
 	printf("Checking journal: ");
 	fflush(stdout);
 	rv = jfsck(file, jdir, &res, flags);
 
-	if (rv == J_ENOENT) {
+	switch (rv) {
+	case J_ESUCCESS:
+		printf("done\n");
+		break;
+	case J_ENOENT:
 		printf("No such file or directory\n");
 		return 1;
-	} else if (rv == J_ENOJOURNAL) {
+	case J_ENOJOURNAL:
 		printf("No journal associated to the file, "
 				"or journal empty\n");
 		return 1;
+	case J_ENOMEM:
+		printf("Not enough memory\n");
+		return 1;
+	case J_ECLEANUP:
+		printf("Error cleaning up the journal directory\n");
+		return 1;
+	case J_EIO:
+		printf("I/O error\n");
+		perror("  additional information");
+		return 1;
+	default:
+		printf("Unknown result, please report as a bug\n");
+		return 1;
 	}
 
-	printf("done\n");
-
 	printf("Journal checking results\n");
 	printf("------------------------\n\n");
 
@@ -85,15 +100,11 @@ int main(int argc, char **argv)
 	printf("In progress:\t %d\n", res.in_progress);
 	printf("Broken:\t\t %d\n", res.broken);
 	printf("Corrupt:\t %d\n", res.corrupt);
-	printf("Apply error:\t %d\n", res.apply_error);
 	printf("Reapplied:\t %d\n", res.reapplied);
 	printf("\n");
 
-	if (!do_cleanup) {
-		printf("You can now safely remove the journal directory "
-				"completely\nto start a new journal.\n");
-	} else {
-		printf("The journal has been checked and cleaned up.\n");
+	if (do_cleanup) {
+		printf("The journal has been cleaned up.\n");
 	}
 
 	return 0;
diff --git a/libjio/journal.c b/libjio/journal.c
index b059eee..7c8d9bf 100644
--- a/libjio/journal.c
+++ b/libjio/journal.c
@@ -8,13 +8,13 @@
 #include <fcntl.h>		/* open() */
 #include <unistd.h>		/* f[data]sync(), close() */
 #include <stdlib.h>		/* malloc() and friends */
-#include <limits.h>		/* MAX_PATH */
+#include <limits.h>		/* PATH_MAX */
 #include <string.h>		/* memcpy() */
-#include <libgen.h>		/* basename(), dirname() */
 #include <stdio.h>		/* fprintf() */
-#include <dirent.h>		/* readdir() and friends */
 #include <errno.h>		/* errno */
-#include <sys/mman.h>		/* mmap() */
+#include <stdint.h>		/* uintX_t */
+#include <arpa/inet.h>		/* htonl() and friends */
+#include <netinet/in.h>		/* htonl() and friends (on some platforms) */
 
 #include "libjio.h"
 #include "common.h"
@@ -24,7 +24,87 @@
 
 
 /*
- * helper functions
+ * On-disk structures
+ *
+ * Each transaction will be stored on disk as a single file, composed of a
+ * header, operation information, and a trailer. The operation information is
+ * composed of repeated operation headers followed by their corresponding
+ * data, one for each operation. A special operation header containing all 0s
+ * marks the end of the operations.
+ * 
+ * Visually, something like this:
+ * 
+ *  +--------+---------+----------+---------+----------+-----+-----+---------+
+ *  | header | op1 hdr | op1 data | op2 hdr | op2 data | ... | eoo | trailer |
+ *  +--------+---------+----------+---------+----------+-----+-----+---------+
+ *             \                                             /
+ *              +--------------- operations ----------------+ 
+ *
+ * The details of each part can be seen on the following structures. All
+ * integers are stored in network byte order.
+ */
+
+/** Transaction file header */
+struct on_disk_hdr {
+	uint16_t ver;
+	uint16_t flags;
+	uint32_t trans_id;
+} __attribute__((packed));
+
+/** Transaction file operation header */
+struct on_disk_ophdr {
+	uint32_t len;
+	uint64_t offset;
+} __attribute__((packed));
+
+/** Transaction file trailer */
+struct on_disk_trailer {
+	uint32_t numops;
+	uint32_t checksum;
+} __attribute__((packed));
+
+
+/* Convert structs to/from host to network (disk) endian */
+
+static void hdr_hton(struct on_disk_hdr *hdr)
+{
+	hdr->ver = htons(hdr->ver);
+	hdr->flags = htons(hdr->flags);
+	hdr->trans_id = htonl(hdr->trans_id);
+}
+
+static void hdr_ntoh(struct on_disk_hdr *hdr)
+{
+	hdr->ver = ntohs(hdr->ver);
+	hdr->flags = ntohs(hdr->flags);
+	hdr->trans_id = ntohl(hdr->trans_id);
+}
+
+static void ophdr_hton(struct on_disk_ophdr *ophdr)
+{
+	ophdr->len = htonl(ophdr->len);
+	ophdr->offset = htonll(ophdr->offset);
+}
+
+static void ophdr_ntoh(struct on_disk_ophdr *ophdr)
+{
+	ophdr->len = ntohl(ophdr->len);
+	ophdr->offset = ntohll(ophdr->offset);
+}
+
+static void trailer_hton(struct on_disk_trailer *trailer) {
+	trailer->numops = htonl(trailer->numops);
+	trailer->checksum = htonl(trailer->checksum);
+}
+
+static void trailer_ntoh(struct on_disk_trailer *trailer) {
+	trailer->numops = ntohl(trailer->numops);
+	trailer->checksum = ntohl(trailer->checksum);
+}
+
+
+/*
+ * Helper functions
  */
 
 /** Get a new transaction id */
@@ -118,6 +198,76 @@ static int fsync_dir(int fd)
 	return rv;
 }
 
+/** Corrupt a journal file. Used as a last resource to prevent an applied
+ * transaction file laying around */
+static int corrupt_journal_file(struct journal_op *jop)
+{
+	off_t pos;
+	struct on_disk_trailer trailer;
+
+	/* We set the number of operations to 0, and the checksum to
+	 * 0xffffffff, so there is no chance it's considered valid after a new
+	 * transaction overwrites this one */
+	trailer.numops = 0;
+	trailer.checksum = 0xffffffff;
+
+	pos = lseek(jop->fd, 0, SEEK_END);
+	if (pos == (off_t) -1)
+		return -1;
+
+	if (pwrite(jop->fd, (unsigned char *) &trailer, sizeof(trailer), pos)
+			!= sizeof(trailer))
+		return -1;
+
+	if (fdatasync(jop->fd) != 0)
+		return -1;
+
+	return 0;
+}
+
+/** Mark the journal as broken. To do so, we just create a file named "broken"
+ * inside the journal directory. Used internally to mark severe journal errors
+ * that should prevent further journal use to avoid potential corruption, like
+ * failures to remove transaction files. The mark is removed by jfsck(). */
+static int mark_broken(struct jfs *fs)
+{
+	char broken_path[PATH_MAX];
+	int fd;
+
+	snprintf(broken_path, PATH_MAX, "%s/broken", fs->jdir);
+	fd = creat(broken_path, 0600);
+	close(fd);
+
+	return fd >= 0;
+}
+
+/** Check if the journal is broken */
+static int is_broken(struct jfs *fs)
+{
+	char broken_path[PATH_MAX];
+
+	snprintf(broken_path, PATH_MAX, "%s/broken", fs->jdir);
+	return access(broken_path, F_OK) == 0;
+}
+
+/* Open and lock (exclusive) the given file name. Returns the file descriptor,
+ * or -1 on error. */
+static int open_and_lockw(const char *name, int flags, int mode)
+{
+	int fd;
+
+	fd = open(name, flags, mode);
+	if (fd < 0)
+		return -1;
+
+	if (plockf(fd, F_LOCKW, 0, 0) != 0) {
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
 
 /*
  * Journal functions
@@ -125,14 +275,17 @@ static int fsync_dir(int fd)
 
 /** Create a new transaction in the journal. Returns a pointer to an opaque
  * jop_t (that is freed using journal_free), or NULL if there was an error. */
-struct journal_op *journal_new(struct jtrans *ts)
+struct journal_op *journal_new(struct jfs *fs, unsigned int flags)
 {
 	int fd, id;
 	ssize_t rv;
 	char *name = NULL;
-	unsigned char buf_init[J_DISKHEADSIZE];
-	unsigned char *bufp;
 	struct journal_op *jop = NULL;
+	struct on_disk_hdr hdr;
+	struct iovec iov[1];
+
+	if (is_broken(fs))
+		goto error;
 
 	jop = malloc(sizeof(struct journal_op));
 	if (jop == NULL)
@@ -142,49 +295,39 @@ struct journal_op *journal_new(struct jtrans *ts)
 	if (name == NULL)
 		goto error;
 
-	id = get_tid(ts->fs);
+	id = get_tid(fs);
 	if (id == 0)
 		goto error;
 
 	/* open the transaction file */
-	get_jtfile(ts->fs, id, name);
-	fd = open(name, O_RDWR | O_CREAT | O_TRUNC, 0600);
+	get_jtfile(fs, id, name);
+	fd = open_and_lockw(name, O_RDWR | O_CREAT | O_TRUNC, 0600);
 	if (fd < 0)
 		goto error;
 
 	jop->id = id;
 	jop->fd = fd;
+	jop->numops = 0;
 	jop->name = name;
-	jop->curpos = 0;
-	jop->ts = ts;
-	jop->fs = ts->fs;
+	jop->csum = 0;
+	jop->fs = fs;
 
 	fiu_exit_on("jio/commit/created_tf");
 
-	/* and lock it, just in case */
-	plockf(fd, F_LOCKW, 0, 0);
-
-	ts->id = id;
-
 	/* save the header */
-	bufp = buf_init;
-
-	memcpy(bufp, (void *) &(ts->id), 4);
-	bufp += 4;
-
-	memcpy(bufp, (void *) &(ts->flags), 4);
-	bufp += 4;
-
-	memcpy(bufp, (void *) &(ts->numops), 4);
-	bufp += 4;
-
-	rv = spwrite(fd, buf_init, J_DISKHEADSIZE, 0);
-	if (rv != J_DISKHEADSIZE) {
-		free(buf_init);
+	hdr.ver = 1;
+	hdr.trans_id = id;
+	hdr.flags = flags;
+	hdr_hton(&hdr);
+
+	iov[0].iov_base = (unsigned char *) &hdr;
+	iov[0].iov_len = sizeof(hdr);
+	rv = swritev(fd, iov, 1);
+	if (rv != sizeof(hdr))
 		goto unlink_error;
-	}
 
-	jop->curpos = J_DISKHEADSIZE;
+	jop->csum = checksum_buf(jop->csum, (unsigned char *) &hdr,
+			sizeof(hdr));
 
 	fiu_exit_on("jio/commit/tf_header");
 
@@ -192,7 +335,7 @@ struct journal_op *journal_new(struct jtrans *ts)
 
 unlink_error:
 	unlink(name);
-	free_tid(ts->fs, ts->id);
+	free_tid(fs, id);
 	close(fd);
 
 error:
@@ -202,81 +345,79 @@ error:
 	return NULL;
 }
 
-/** Save the given transaction in the journal */
-int journal_save(struct journal_op *jop)
+/** Save a single operation in the journal file */
+int journal_add_op(struct journal_op *jop, unsigned char *buf, size_t len,
+		off_t offset)
 {
 	ssize_t rv;
-	uint32_t csum;
-	struct joper *op;
-	unsigned char hdr[J_DISKOPHEADSIZE];
-	unsigned char *hdrp;
-	const struct jtrans *ts = jop->ts;
-
-	/* save each transacion in the file */
-	for (op = ts->op; op != NULL; op = op->next) {
-		/* read the current content only if the transaction is not
-		 * marked as NOROLLBACK, and if the data is not there yet,
-		 * which is the normal case, but for rollbacking we fill it
-		 * ourselves */
-		if (!(ts->flags & J_NOROLLBACK) && (op->pdata == NULL)) {
-			op->pdata = malloc(op->len);
-			if (op->pdata == NULL)
-				goto error;
-
-			op->plen = op->len;
-
-			rv = spread(ts->fs->fd, op->pdata, op->len,
-					op->offset);
-			if (rv < 0)
-				goto error;
-			if (rv < op->len) {
-				/* we are extending the file! */
-				/* ftruncate(ts->fs->fd, op->offset + op->len); */
-				op->plen = rv;
-			}
-		}
-
-		/* save the operation's header */
-		hdrp = hdr;
+	struct on_disk_ophdr ophdr;
+	struct iovec iov[2];
 
-		memcpy(hdrp, (void *) &(op->len), 4);
-		hdrp += 4;
+	ophdr.len = len;
+	ophdr.offset = offset;
+	ophdr_hton(&ophdr);
 
-		memcpy(hdrp, (void *) &(op->plen), 4);
-		hdrp += 4;
+	iov[0].iov_base = (unsigned char *) &ophdr;
+	iov[0].iov_len = sizeof(ophdr);
+	jop->csum = checksum_buf(jop->csum, (unsigned char *) &ophdr,
+			sizeof(ophdr));
 
-		memcpy(hdrp, (void *) &(op->offset), 8);
-		hdrp += 8;
+	iov[1].iov_base = buf;
+	iov[1].iov_len = len;
+	jop->csum = checksum_buf(jop->csum, buf, len);
 
-		rv = spwrite(jop->fd, hdr, J_DISKOPHEADSIZE, jop->curpos);
-		if (rv != J_DISKOPHEADSIZE)
-			goto error;
-
-		fiu_exit_on("jio/commit/tf_ophdr");
+	fiu_exit_on("jio/commit/tf_pre_addop");
 
-		jop->curpos += J_DISKOPHEADSIZE;
+	rv = swritev(jop->fd, iov, 2);
+	if (rv != sizeof(ophdr) + len)
+		goto error;
 
-		/* and save it to the disk */
-		rv = spwrite(jop->fd, op->buf, op->len, jop->curpos);
-		if (rv != op->len)
-			goto error;
+	fiu_exit_on("jio/commit/tf_addop");
 
-		jop->curpos += op->len;
+	jop->numops++;
 
-		fiu_exit_on("jio/commit/tf_opdata");
-	}
+	return 0;
 
-	fiu_exit_on("jio/commit/tf_data");
+error:
+	return -1;
+}
 
-	/* compute and save the checksum (curpos is always small, so there's
-	 * no overflow possibility when we convert to size_t) */
-	if (!checksum(jop->fd, jop->curpos, &csum))
-		goto error;
+/** Prepares to commit the operation. Can be omitted. */
+void journal_pre_commit(struct journal_op *jop)
+{
+	/* In an attempt to reduce journal_commit() fsync() waiting time, we
+	 * submit the sync here, hoping that at least some of it will be ready
+	 * by the time we hit journal_commit() */
+	sync_range_submit(jop->fd, 0, 0);
+}
 
-	rv = spwrite(jop->fd, &csum, sizeof(uint32_t), jop->curpos);
-	if (rv != sizeof(uint32_t))
+/** Commit the journal operation */
+int journal_commit(struct journal_op *jop)
+{
+	ssize_t rv;
+	struct on_disk_ophdr ophdr;
+	struct on_disk_trailer trailer;
+	struct iovec iov[2];
+
+	/* write the empty ophdr to mark there are no more operations, and
+	 * then the trailer */
+	ophdr.len = 0;
+	ophdr.offset = 0;
+	ophdr_hton(&ophdr);
+	iov[0].iov_base = (unsigned char *) &ophdr;
+	iov[0].iov_len = sizeof(ophdr);
+	jop->csum = checksum_buf(jop->csum, (unsigned char *) &ophdr,
+			sizeof(ophdr));
+
+	trailer.checksum = jop->csum;
+	trailer.numops = jop->numops;
+	trailer_hton(&trailer);
+	iov[1].iov_base = (unsigned char *) &trailer;
+	iov[1].iov_len = sizeof(trailer);
+
+	rv = swritev(jop->fd, iov, 2);
+	if (rv != sizeof(ophdr) + sizeof(trailer))
 		goto error;
-	jop->curpos += sizeof(uint32_t);
 
 	/* this is a simple but efficient optimization: instead of doing
 	 * everything O_SYNC, we sync at this point only, this way we avoid
@@ -285,7 +426,7 @@ int journal_save(struct journal_op *jop)
 	 * point) so we only flush here (both data and metadata) */
 	if (fsync(jop->fd) != 0)
 		goto error;
-	if (fsync_dir(ts->fs->jdirfd) != 0)
+	if (fsync_dir(jop->fs->jdirfd) != 0)
 		goto error;
 
 	fiu_exit_on("jio/commit/tf_sync");
@@ -299,28 +440,34 @@ error:
 /** Free a journal operation.
  * NOTE: It can't assume the save completed successfuly, so we can call it
  * when journal_save() fails.  */
-int journal_free(struct journal_op *jop)
+int journal_free(struct journal_op *jop, int do_unlink)
 {
 	int rv;
 
+	if (!do_unlink) {
+		rv = 0;
+		goto exit;
+	}
+
 	rv = -1;
 
 	if (unlink(jop->name)) {
 		/* we do not want to leave a possibly complete transaction
 		 * file around when the transaction was not commited and the
 		 * unlink failed, so we attempt to truncate it, and if that
-		 * fails we corrupt the checksum as a last resort */
+		 * fails we corrupt it as a last resort. */
 		if (ftruncate(jop->fd, 0) != 0) {
-			if (pwrite(jop->fd, "\0\0\0\0", 4, jop->curpos - 4)
-					!= 4)
-				goto exit;
-			if (fdatasync(jop->fd) != 0)
+			if (corrupt_journal_file(jop) != 0) {
+				mark_broken(jop->fs);
 				goto exit;
+			}
 		}
 	}
 
-	if (fsync_dir(jop->fs->jdirfd) != 0)
+	if (fsync_dir(jop->fs->jdirfd) != 0) {
+		mark_broken(jop->fs);
 		goto exit;
+	}
 
 	fiu_exit_on("jio/commit/pre_ok_free_tid");
 	free_tid(jop->fs, jop->id);
@@ -330,12 +477,116 @@ int journal_free(struct journal_op *jop)
 exit:
 	close(jop->fd);
 
-	if (jop->name)
-		free(jop->name);
-
+	free(jop->name);
 	free(jop);
 
 	return rv;
 }
 
+/** Fill a transaction structure from a mmapped transaction file. Useful for
+ * checking purposes.
+ * @returns 0 on success, -1 if the file was broken, -2 if the checksums didn't
+ *	match
+ */
+int fill_trans(unsigned char *map, off_t len, struct jtrans *ts)
+{
+	int rv;
+	unsigned char *p;
+	struct operation *op, *tmp;
+	struct on_disk_hdr hdr;
+	struct on_disk_ophdr ophdr;
+	struct on_disk_trailer trailer;
+
+	rv = -1;
+
+	if (len < sizeof(hdr) + sizeof(ophdr) + sizeof(trailer))
+		return -1;
+
+	p = map;
+
+	memcpy(&hdr, p, sizeof(hdr));
+	p += sizeof(hdr);
+
+	hdr_ntoh(&hdr);
+	if (hdr.ver != 1)
+		return -1;
+
+	ts->id = hdr.trans_id;
+	ts->flags = hdr.flags;
+	ts->numops_r = 0;
+	ts->numops_w = 0;
+	ts->len_w = 0;
+
+	for (;;) {
+		if (p + sizeof(ophdr) > map + len)
+			goto error;
+
+		memcpy(&ophdr, p,  sizeof(ophdr));
+		p += sizeof(ophdr);
+
+		ophdr_ntoh(&ophdr);
+
+		if (ophdr.len == 0 && ophdr.offset == 0) {
+			/* This header marks the end of the operations */
+			break;
+		}
+
+		if (p + ophdr.len > map + len)
+			goto error;
+
+		op = malloc(sizeof(struct operation));
+		if (op == NULL)
+			goto error;
+
+		op->len = ophdr.len;
+		op->offset = ophdr.offset;
+		op->direction = D_WRITE;
+
+		op->buf = (void *) p;
+		p += op->len;
+
+		op->pdata = NULL;
+
+		if (ts->op == NULL) {
+			ts->op = op;
+			op->prev = NULL;
+			op->next = NULL;
+		} else {
+			for (tmp = ts->op; tmp->next != NULL; tmp = tmp->next)
+				;
+			tmp->next = op;
+			op->prev = tmp;
+			op->next = NULL;
+		}
+
+		ts->numops_w++;
+		ts->len_w += op->len;
+	}
+
+	if (p + sizeof(trailer) > map + len)
+		goto error;
+
+	memcpy(&trailer, p, sizeof(trailer));
+	p += sizeof(trailer);
+
+	trailer_ntoh(&trailer);
+
+	if (trailer.numops != ts->numops_w)
+		goto error;
+
+	if (checksum_buf(0, map, len - sizeof(trailer)) != trailer.checksum) {
+		rv = -2;
+		goto error;
+	}
+
+	return 0;
+
+error:
+	while (ts->op != NULL) {
+		tmp = ts->op->next;
+		free(ts->op);
+		ts->op = tmp;
+	}
+	return rv;
+}
 
diff --git a/libjio/journal.h b/libjio/journal.h
index 5a4666c..bdc1445 100644
--- a/libjio/journal.h
+++ b/libjio/journal.h
@@ -2,23 +2,29 @@
 #ifndef _JOURNAL_H
 #define _JOURNAL_H
 
+#include <stdint.h>
 #include "libjio.h"
 
 
 struct journal_op {
 	int id;
 	int fd;
+	int numops;
 	char *name;
-	off_t curpos;
-	struct jtrans *ts;
+	uint32_t csum;
 	struct jfs *fs;
 };
 
 typedef struct journal_op jop_t;
 
-struct journal_op *journal_new(struct jtrans *ts);
-int journal_save(struct journal_op *jop);
-int journal_free(struct journal_op *jop);
+struct journal_op *journal_new(struct jfs *fs, unsigned int flags);
+int journal_add_op(struct journal_op *jop, unsigned char *buf, size_t len,
+		off_t offset);
+void journal_pre_commit(struct journal_op *jop);
+int journal_commit(struct journal_op *jop);
+int journal_free(struct journal_op *jop, int do_unlink);
+
+int fill_trans(unsigned char *map, off_t len, struct jtrans *ts);
 
 #endif
 
diff --git a/libjio/libjio.3 b/libjio/libjio.3
index 4de5881..6e946fb 100644
--- a/libjio/libjio.3
+++ b/libjio/libjio.3
@@ -6,7 +6,7 @@ libjio - A library for Journaled I/O
 .B #include <libjio.h>
 
 .BI "jfs_t *jopen(const char *" name ", int " flags ", int " mode ",
-.BI "           int " jflags ");"
+.BI "           unsigned int " jflags ");"
 .BI "ssize_t jread(jfs_t *" fs ", void *" buf ", size_t " count ");"
 .BI "ssize_t jpread(jfs_t *" fs ", void *" buf ", size_t " count ","
 .BI "		off_t " offset ");"
@@ -21,9 +21,11 @@ libjio - A library for Journaled I/O
 .BI "off_t jlseek(jfs_t *" fs ", off_t " offset ", int " whence ");"
 .BI "int jclose(jfs_t *" fs ");"
 
-.BI "jtrans_t *jtrans_new(jfs_t *" fs ");"
+.BI "jtrans_t *jtrans_new(jfs_t *" fs ", unsigned int " flags ");"
 .BI "int jtrans_commit(jtrans_t *" ts ");"
-.BI "int jtrans_add(jtrans_t *" ts ", const void *" buf ","
+.BI "int jtrans_add_r(jtrans_t *" ts ", void *" buf ","
+.BI "		size_t " count ", off_t " offset ");"
+.BI "int jtrans_add_w(jtrans_t *" ts ", const void *" buf ","
 .BI "		size_t " count ", off_t " offset ");"
 .BI "int jtrans_rollback(jtrans_t *" ts ");"
 .BI "void jtrans_free(jtrans_t *" ts ");"
@@ -42,7 +44,6 @@ libjio - A library for Journaled I/O
     int invalid;          /* invalid files in the journal directory */
     int in_progress;      /* transactions in progress */
     int broken;           /* transactions broken */
-    int apply_error;      /* errors applying the transaction */
     int rollbacked;       /* transactions that were rollbacked */
     ...
 };
@@ -53,6 +54,7 @@ libjio - A library for Journaled I/O
     J_ENOJOURNAL = -2,	/* No journal associated with the given file */
     J_ENOMEM = -3,	/* Not enough free memory */
     J_ECLEANUP = -4,	/* Error cleaning the journal directory */
+    J_EIO = -5,		/* I/O error */
 };
 
 
@@ -79,8 +81,8 @@ interfaces to
 and friends.
 
 The basic functions consist of
-.BR jtrans_new() ", " jtrans_add() ", " jtrans_commit() " and "
-.BR jtrans_rollback() .
+.BR jtrans_new() ", " jtrans_add_r() ", " jtrans_add_w() ", "
+.BR jtrans_commit() " and " jtrans_rollback() .
 They provide a lower-level method for manipulating transactions.
 
 .SS TYPES AND STRUCTURES
@@ -135,8 +137,8 @@ to the journal directory (usually NULL for the default, unless you've changed
 it manually using
 .BR jmove_journal() ),
 and optionally a flags parameter, which can be 0 for the default behaviour, or
-J_NOCLEANUP to indicate that the journal should not be cleaned up after
-successful recovery.
+J_CLEANUP to indicate that the journal should be cleaned up after successful
+recovery.
 
 It is used to perform journal checking and recovery in case of a crash. It
 must be performed when nobody else is using the file (like in the case of a
@@ -146,11 +148,13 @@ summarizing the outcome of the operation. The error codes can be either
 .I J_ENOENT
 (no such file),
 .I J_ENOJOURNAL
-(no journal associated with that file) or
+(no journal associated with that file),
 .I J_ENOMEM
-(not enough free memory), and
+(not enough free memory),
 .I J_ECLEANUP
-(error cleaning the journal directory). There is also a program named
+(error cleaning the journal directory), and
+.I J_EIO
+(I/O error). There is also a program named
 .I jiofsck
 which is just a simple human frontend to this function.
 
@@ -172,9 +176,8 @@ semantics and behave the same way.
 .SS BASIC FUNCTIONS
 
 The basic functions are the ones which manipulate transactions directly:
-.BR jtrans_new() ", " jtrans_add() ", " jtrans_commit() ", " jtrans_rollback()
-and
-.BR jtrans_free() .
+.BR jtrans_new() ", " jtrans_add_r() ", " jtrans_add_w() ", "
+.BR jtrans_commit() ", " jtrans_rollback() " and " jtrans_free()" .
 These are intended to be use when your application requires direct control
 over the transactions.
 
@@ -188,22 +191,34 @@ is not a disk operation, but only frees the pointers that were previously
 allocated by the library; all disk operations are performed by the other two
 functions.
 
-.B jtrans_add()
-is used to add operations to a transaction, and it takes the same parameters
-as
+You can add multiple read and write operations to a transaction, and they will
+be applied in order.
+
+.B jtrans_add_w()
+is used to add write operations to a transaction, and it takes the same
+parameters as
 .BR pwrite() :
 a buffer, its length and the offset where it should be applied, and adds it to
-the transaction. You can add multiple operations to a transaction, and they
-will be applied in order.
+the transaction. The buffer is copied internally and can be free()d right
+after this function returns.
+
+.B jtrans_add_r()
+is used to add read operations to a transaction, and it takes the same
+parameters as
+.BR pread() :
+a buffer, its length and the offset where it should read from, and adds it to
+the transaction. Note that if there is not enough data in the file to read
+the specified amount of bytes, the commit will fail, so do not attempt to read
+beyond EOF (you can use jread() for that purpose).
 
 .B jtrans_commit()
-commits the given transaction to disk. After it has returned, data has been
-saved to the disk. The commit operation is atomic with regards to other read
-or write operations on different processes, as long as they all access it via
-libjio. It returns the number of bytes written, -1 if there was an error but
-atomic warantees were preserved, or -2 if there was an error and there is a
-possible break of atomic warantees (which is an indication of a severe
-underlying condition).
+commits the given transaction to disk. After it has returned, write operations
+have been saved to the disk, and read operations have been read from it. The
+commit operation is atomic with regards to other read or write operations on
+different processes, as long as they all access it via libjio. It returns the
+number 0 on success, -1 if there was an error but atomic warantees were
+preserved, or -2 if there was an error and there is a possible break of atomic
+warantees (which is an indication of a severe underlying condition).
 
 .B jtrans_rollback()
 reverses a transaction that was applied with
diff --git a/libjio/libjio.h b/libjio/libjio.h
index 4421d25..0476c6e 100644
--- a/libjio/libjio.h
+++ b/libjio/libjio.h
@@ -59,9 +59,6 @@ struct jfsck_result {
 	/** Number of corrupt transactions */
 	int corrupt;
 
-	/** Number of errors applying transactions */
-	int apply_error;
-
 	/** Number of transactions successfully reapplied */
 	int reapplied;
 };
@@ -86,6 +83,9 @@ enum jfsck_return {
 
 	/** Error cleaning the journal directory */
 	J_ECLEANUP = -4,
+
+	/** I/O error */
+	J_EIO = -5,
 };
 
 
@@ -111,7 +111,7 @@ enum jfsck_return {
  * @see jclose(), open()
  * @ingroup basic
  */
-jfs_t *jopen(const char *name, int flags, int mode, int jflags);
+jfs_t *jopen(const char *name, int flags, int mode, unsigned int jflags);
 
 /** Close a file opened with jopen().
  *
@@ -137,17 +137,21 @@ int jsync(jfs_t *fs);
 
 /** Create a new transaction.
  *
+ * Note that the final flags to use in the transaction will be the result of
+ * ORing the flags parameter with fs' flags.
+ *
  * @param fs open file the transaction will apply to
+ * @param flags transaction flags
  * @returns a new transaction (must be freed using jtrans_free())
  * @see jtrans_free()
  * @ingroup basic
  */
-jtrans_t *jtrans_new(jfs_t *fs);
+jtrans_t *jtrans_new(jfs_t *fs, unsigned int flags);
 
-/** Add an operation to a transaction.
+/** Add a write operation to a transaction.
  *
- * An operation consists of a buffer, its length, and the offset to write it
- * to.
+ * A write operation consists of a buffer, its length, and the offset to write
+ * it to.
  *
  * The file will not be touched (not even locked) until commit time, where the
  * first count bytes of buf will be written at offset.
@@ -155,6 +159,9 @@ jtrans_t *jtrans_new(jfs_t *fs);
  * Transactions will be applied in order, and overlapping operations are
  * permitted, in which case the latest one will prevail.
  *
+ * The buffer will be copied internally and can be free()d right after this
+ * function returns.
+ *
  * @param ts transaction
  * @param buf buffer to write
  * @param count how many bytes from the buffer to write
@@ -162,22 +169,50 @@ jtrans_t *jtrans_new(jfs_t *fs);
  * @returns 0 on success, -1 on error
  * @ingroup basic
  */
-int jtrans_add(jtrans_t *ts, const void *buf, size_t count, off_t offset);
+int jtrans_add_w(jtrans_t *ts, const void *buf, size_t count, off_t offset);
+
+/** Add a read operation to a transaction.
+ *
+ * An operation consists of a buffer, its length, and the offset to read it
+ * from.
+ *
+ * The file will not be touched (not even locked) until commit time, where the
+ * first count bytes at offset will be read into buf.
+ *
+ * Note that if there is not enough data in the file to read the specified
+ * amount of bytes, the commit will fail, so do not attempt to read beyond EOF
+ * (you can use jread() for that purpose).
+ *
+ * Transactions will be applied in order, and overlapping operations are
+ * permitted, in which case the latest one will prevail.
+ *
+ * In case of an error in jtrans_commit(), the contents of the buffer are
+ * undefined.
+ *
+ * @param ts transaction
+ * @param buf buffer to read to
+ * @param count how many bytes to read
+ * @param offset offset to read at
+ * @returns 0 on success, -1 on error
+ * @ingroup basic
+ * @see jread()
+ */
+int jtrans_add_r(jtrans_t *ts, void *buf, size_t count, off_t offset);
 
 /** Commit a transaction.
  * 
- * All the operations added to it using jtrans_add() will be written to disk,
- * in the same order they were added.
+ * All the operations added to it using jtrans_add_w()/jtrans_add_r() will be
+ * written to/read from disk, in the same order they were added.
  *
  * After this function returns successfully, all the data can be trusted to be
  * on the disk. The commit is atomic with regards to other processes using
  * libjio, but not accessing directly to the file.
  *
  * @param ts transaction
- * @returns the amount of bytes written to disk, or -1 if there was an error
- *	but atomic warranties were preserved, or -2 if there was an error and
- *	there is a possible break of atomic warranties (which is an indication
- *	of a severe underlying condition).
+ * @returns 0 on success, or -1 if there was an error but atomic warranties
+ * 	were preserved, or -2 if there was an error and there is a possible
+ * 	break of atomic warranties (which is an indication of a severe
+ * 	underlying condition).
  * @ingroup basic
  */
 ssize_t jtrans_commit(jtrans_t *ts);
@@ -185,7 +220,8 @@ ssize_t jtrans_commit(jtrans_t *ts);
 /** Rollback a transaction.
  *
  * This function atomically undoes a previous committed transaction. After its
- * successful return, the data can be trusted to be on disk.
+ * successful return, the data can be trusted to be on disk. The read
+ * operations will be ignored.
  *
  * Use with care.
  *
@@ -259,14 +295,14 @@ int jfs_autosync_stop(jfs_t *fs);
  * @param jdir journal directory of the given file, use NULL for the default
  * @param res structure where to store the result
  * @param flags flags that change the checking behaviour, currently only
- *	J_NOCLEANUP is supported, which avoids cleaning up the journal
- *	directory after a successful recovery
+ *	J_CLEANUP is supported, which removes the journal directory after a
+ *	successful recovery
  * @see struct jfsck_result
  * @returns 0 on success, < 0 on error, with the following possible negative
  * 	values from enum jfsck_return: J_ENOENT if there was no such file with
  * 	the given name, J_ENOJOURNAL if there was no journal at the given
  * 	jdir, J_ENOMEM if memory could not be allocated, J_ECLEANUP if there
- * 	was an error cleaning the journal.
+ * 	was an error cleaning the journal, J_EIO if there was an I/O error.
  * @ingroup check
  */
 enum jfsck_return jfsck(const char *name, const char *jdir,
@@ -458,11 +494,11 @@ FILE *jfsopen(jfs_t *stream, const char *mode);
  * jfsck() flags
  */
 
-/** Do not perform a journal cleanup. Used in jfsck().
+/** Perform a journal cleanup. Used in jfsck().
  *
  * @see jfsck()
  * @ingroup check */
-#define J_NOCLEANUP	1
+#define J_CLEANUP	1
 
 #endif
 
diff --git a/libjio/libjio.skel.pc b/libjio/libjio.pc.in
similarity index 91%
rename from libjio/libjio.skel.pc
rename to libjio/libjio.pc.in
index 79f0b99..4e5adc2 100644
--- a/libjio/libjio.skel.pc
+++ b/libjio/libjio.pc.in
@@ -6,7 +6,7 @@ includedir=${prefix}/include
 Name: libjio
 Description: A library for Journaled I/O
 URL: http://blitiri.com.ar/p/libjio/
-Version: 0.51
+Version: ++VERSION++
 Libs: -L${libdir} -ljio
 Cflags: -I${includedir} ++CFLAGS++
 
diff --git a/libjio/trans.c b/libjio/trans.c
index 07d7bfe..91970d7 100644
--- a/libjio/trans.c
+++ b/libjio/trans.c
@@ -28,7 +28,7 @@
  */
 
 /* Initialize a transaction structure */
-struct jtrans *jtrans_new(struct jfs *fs)
+struct jtrans *jtrans_new(struct jfs *fs, unsigned int flags)
 {
 	pthread_mutexattr_t attr;
 	struct jtrans *ts;
@@ -39,13 +39,15 @@ struct jtrans *jtrans_new(struct jfs *fs)
 
 	ts->fs = fs;
 	ts->id = 0;
-	ts->flags = fs->flags;
+	ts->flags = fs->flags | flags;
 	ts->op = NULL;
-	ts->numops = 0;
-	ts->len = 0;
+	ts->numops_r = 0;
+	ts->numops_w = 0;
+	ts->len_w = 0;
+
 	pthread_mutexattr_init(&attr);
 	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_NORMAL);
-	pthread_mutex_init( &(ts->lock), &attr);
+	pthread_mutex_init(&(ts->lock), &attr);
 	pthread_mutexattr_destroy(&attr);
 
 	return ts;
@@ -54,14 +56,14 @@ struct jtrans *jtrans_new(struct jfs *fs)
 /* Free the contents of a transaction structure */
 void jtrans_free(struct jtrans *ts)
 {
-	struct joper *tmpop;
+	struct operation *tmpop;
 
 	ts->fs = NULL;
 
 	while (ts->op != NULL) {
 		tmpop = ts->op->next;
 
-		if (ts->op->buf)
+		if (ts->op->buf && ts->op->direction == D_WRITE)
 			free(ts->op->buf);
 		if (ts->op->pdata)
 			free(ts->op->pdata);
@@ -74,88 +76,173 @@ void jtrans_free(struct jtrans *ts)
 	free(ts);
 }
 
-/* Add an operation to a transaction */
-int jtrans_add(struct jtrans *ts, const void *buf, size_t count, off_t offset)
+/** Lock/unlock the ranges of the file covered by the transaction. mode must
+ * be either F_LOCKW or F_UNLOCK. Returns 0 on success, -1 on error. */
+static int lock_file_ranges(struct jtrans *ts, int mode)
 {
-	struct joper *jop, *tmpop;
+	off_t lr;
+	struct operation *op;
 
-	pthread_mutex_lock(&(ts->lock));
+	if (ts->flags & J_NOLOCK)
+		return 0;
 
-	/* fail for read-only accesses */
-	if (ts->flags & J_RDONLY) {
-		pthread_mutex_unlock(&(ts->lock));
-		return -1;
+	for (op = ts->op; op != NULL; op = op->next) {
+		if (mode == F_LOCKW) {
+			lr = plockf(ts->fs->fd, F_LOCKW, op->offset, op->len);
+			if (lr == -1)
+				goto error;
+			op->locked = 1;
+		} else if (mode == F_UNLOCK && op->locked) {
+			lr = plockf(ts->fs->fd, F_UNLOCK, op->offset,
+					op->len);
+			if (lr == -1)
+				goto error;
+			op->locked = 0;
+		}
 	}
 
-	if ((long long) ts->len + count > MAX_TSIZE) {
-		pthread_mutex_unlock(&(ts->lock));
+	return 0;
+
+error:
+	return -1;
+}
+
+/** Read the previous information from the disk into the given operation
+ * structure. Returns 0 on success, -1 on error. */
+static int operation_read_prev(struct jtrans *ts, struct operation *op)
+{
+	ssize_t rv;
+
+	op->pdata = malloc(op->len);
+	if (op->pdata == NULL)
 		return -1;
+
+	rv = spread(ts->fs->fd, op->pdata, op->len,
+			op->offset);
+	if (rv < 0) {
+		free(op->pdata);
+		op->pdata = NULL;
+		return -1;
+	}
+
+	op->plen = op->len;
+	if (rv < op->len) {
+		/* we are extending the file! */
+		/* ftruncate(ts->fs->fd, op->offset + op->len); */
+		op->plen = rv;
+	}
+
+	return 0;
+}
+
+/** Common function to add an operation to a transaction */
+static int jtrans_add_common(struct jtrans *ts, const void *buf, size_t count,
+		off_t offset, enum op_direction direction)
+{
+	struct operation *op, *tmpop;
+
+	op = tmpop = NULL;
+
+	pthread_mutex_lock(&(ts->lock));
+
+	/* Writes are not allowed in read-only mode, they fail early */
+	if ((ts->flags & J_RDONLY) && direction == D_WRITE)
+		goto error;
+
+	if (count == 0)
+		goto error;
+
+	if ((long long) ts->len_w + count > MAX_TSIZE)
+		goto error;
+
+	op = malloc(sizeof(struct operation));
+	if (op == NULL)
+		goto error;
+
+	if (direction == D_WRITE) {
+		op->buf = malloc(count);
+		if (op->buf == NULL)
+			goto error;
+
+		ts->numops_w++;
+	} else {
+		ts->numops_r++;
 	}
 
-	/* find the last operation in the transaction and create a new one at
-	 * the end */
+	/* add op to the end of the linked list */
+	op->next = NULL;
 	if (ts->op == NULL) {
-		ts->op = malloc(sizeof(struct joper));
-		if (ts->op == NULL) {
-			pthread_mutex_unlock(&(ts->lock));
-			return -1;
-		}
-		jop = ts->op;
-		jop->prev = NULL;
+		ts->op = op;
+		op->prev = NULL;
 	} else {
 		for (tmpop = ts->op; tmpop->next != NULL; tmpop = tmpop->next)
 			;
-		tmpop->next = malloc(sizeof(struct joper));
-		if (tmpop->next == NULL) {
-			pthread_mutex_unlock(&(ts->lock));
-			return -1;
-		}
-		tmpop->next->prev = tmpop;
-		jop = tmpop->next;
+		tmpop->next = op;
+		op->prev = tmpop;
 	}
 
-	jop->buf = malloc(count);
-	if (jop->buf == NULL) {
-		/* remove from the list and fail */
-		if (jop->prev == NULL) {
-			ts->op = NULL;
-		} else {
-			jop->prev->next = jop->next;
+	pthread_mutex_unlock(&(ts->lock));
+
+	op->len = count;
+	op->offset = offset;
+	op->plen = 0;
+	op->pdata = NULL;
+	op->locked = 0;
+	op->direction = direction;
+
+	if (direction == D_WRITE) {
+		memcpy(op->buf, buf, count);
+
+		if (!(ts->flags & J_NOROLLBACK)) {
+			/* jtrans_commit() will want to read the current data,
+			 * so we tell the kernel about that */
+			posix_fadvise(ts->fs->fd, offset, count,
+					POSIX_FADV_WILLNEED);
 		}
-		free(jop);
-		pthread_mutex_unlock(&(ts->lock));
-		return -1;
+	} else {
+		/* this casts the const away, which is ugly but let us have a
+		 * common read/write path and avoid useless code repetition
+		 * just to handle it */
+		op->buf = (void *) buf;
+
+		/* if there are no overlapping writes, jtrans_commit() will
+		 * want to read the data from the disk; and if there are we
+		 * will already have submitted a request and one more won't
+		 * hurt */
+		posix_fadvise(ts->fs->fd, offset, count, POSIX_FADV_WILLNEED);
 	}
 
-	ts->numops++;
-	ts->len += count;
+	return 0;
+
+error:
 	pthread_mutex_unlock(&(ts->lock));
 
-	/* we copy the buffer because then the caller can reuse it */
-	memcpy(jop->buf, buf, count);
-	jop->len = count;
-	jop->offset = offset;
-	jop->next = NULL;
-	jop->plen = 0;
-	jop->pdata = NULL;
-	jop->locked = 0;
+	if (op && direction == D_WRITE)
+		free(op->buf);
+	free(op);
 
-	if (!(ts->flags & J_NOROLLBACK)) {
-		/* jtrans_commit() will want to read the current data, so we
-		 * tell the kernel about that */
-		posix_fadvise(ts->fs->fd, offset, count, POSIX_FADV_WILLNEED);
-	}
+	return -1;
+}
 
-	return 0;
+int jtrans_add_r(struct jtrans *ts, void *buf, size_t count, off_t offset)
+{
+	return jtrans_add_common(ts, buf, count, offset, D_READ);
 }
 
+int jtrans_add_w(struct jtrans *ts, const void *buf, size_t count,
+		off_t offset)
+{
+	return jtrans_add_common(ts, buf, count, offset, D_WRITE);
+}
+
+
 /* Commit a transaction */
 ssize_t jtrans_commit(struct jtrans *ts)
 {
-	ssize_t rv;
-	struct joper *op;
+	ssize_t r, retval = -1;
+	struct operation *op;
 	struct jlinger *linger;
-	jop_t *jop;
+	jop_t *jop = NULL;
 	size_t written = 0;
 
 	pthread_mutex_lock(&(ts->lock));
@@ -164,46 +251,84 @@ ssize_t jtrans_commit(struct jtrans *ts)
 	ts->flags = ts->flags & ~J_COMMITTED;
 	ts->flags = ts->flags & ~J_ROLLBACKED;
 
-	/* fail for read-only accesses */
-	if (ts->flags & J_RDONLY)
+	if (ts->numops_r + ts->numops_w == 0)
 		goto exit;
 
-	/* first of all lock all the regions we're going to work with;
-	 * otherwise there could be another transaction trying to write the
-	 * same spots and we could end up with interleaved writes, that could
-	 * break atomicity warantees if we need to rollback */
-	if (!(ts->flags & J_NOLOCK)) {
-		off_t lr;
-		for (op = ts->op; op != NULL; op = op->next) {
-			lr = plockf(ts->fs->fd, F_LOCKW, op->offset, op->len);
-			if (lr == -1)
-				/* note it can fail with EDEADLK */
-				goto unlock_exit;
-			op->locked = 1;
-		}
+	/* fail for read-only accesses if we have write operations */
+	if (ts->numops_w && (ts->flags & J_RDONLY))
+		goto exit;
+
+	/* create and fill the transaction file only if we have at least one
+	 * write operation */
+	if (ts->numops_w) {
+		jop = journal_new(ts->fs, ts->flags);
+		if (jop == NULL)
+			goto exit;
+	}
+
+	for (op = ts->op; op != NULL; op = op->next) {
+		if (op->direction == D_READ)
+			continue;
+
+		r = journal_add_op(jop, op->buf, op->len, op->offset);
+		if (r != 0)
+			goto unlink_exit;
+
+		fiu_exit_on("jio/commit/tf_opdata");
 	}
 
-	jop = journal_new(ts);
-	if (jop == NULL)
-		goto unlock_exit;
+	if (jop)
+		journal_pre_commit(jop);
+
+	fiu_exit_on("jio/commit/tf_data");
 
-	rv = journal_save(jop);
-	if (rv < 0)
+	/* lock all the regions we're going to work with; otherwise there
+	 * could be another transaction trying to write the same spots and we
+	 * could end up with interleaved writes, that could break atomicity
+	 * warantees if we need to rollback */
+	if (lock_file_ranges(ts, F_LOCKW) != 0)
 		goto unlink_exit;
 
+	if (!(ts->flags & J_NOROLLBACK)) {
+		for (op = ts->op; op != NULL; op = op->next) {
+			if (op->direction == D_READ)
+				continue;
+
+			 r = operation_read_prev(ts, op);
+			 if (r < 0)
+				 goto unlink_exit;
+		}
+	}
+
+	if (jop) {
+		r = journal_commit(jop);
+		if (r < 0)
+			goto unlink_exit;
+	}
+
 	/* now that we have a safe transaction file, let's apply it */
 	written = 0;
 	for (op = ts->op; op != NULL; op = op->next) {
-		rv = spwrite(ts->fs->fd, op->buf, op->len, op->offset);
-		if (rv != op->len)
+		if (op->direction == D_READ) {
+			r = spread(ts->fs->fd, op->buf, op->len, op->offset);
+			if (r != op->len)
+				goto rollback_exit;
+
+			continue;
+		}
+
+		/* from now on, write ops (which are more interesting) */
+
+		r = spwrite(ts->fs->fd, op->buf, op->len, op->offset);
+		if (r != op->len)
 			goto rollback_exit;
 
-		written += rv;
+		written += r;
 
 		if (have_sync_range && !(ts->flags & J_LINGER)) {
-			rv = sync_range_submit(ts->fs->fd, op->len,
+			r = sync_range_submit(ts->fs->fd, op->len,
 					op->offset);
-			if (rv != 0)
+			if (r != 0)
 				goto rollback_exit;
 		}
 
@@ -212,96 +337,111 @@ ssize_t jtrans_commit(struct jtrans *ts)
 
 	fiu_exit_on("jio/commit/wrote_all_ops");
 
-	if (ts->flags & J_LINGER) {
+	if (jop && (ts->flags & J_LINGER)) {
+		struct jlinger *lp;
+
 		linger = malloc(sizeof(struct jlinger));
 		if (linger == NULL)
 			goto rollback_exit;
 
 		linger->jop = jop;
+		linger->next = NULL;
 
 		pthread_mutex_lock(&(ts->fs->ltlock));
-		linger->next = ts->fs->ltrans;
-		ts->fs->ltrans = linger;
+
+		/* add it to the end of the list so they're in order */
+		if (ts->fs->ltrans == NULL) {
+			ts->fs->ltrans = linger;
+		} else {
+			lp = ts->fs->ltrans;
+			while (lp->next != NULL)
+				lp = lp->next;
+			lp->next = linger;
+		}
+
 		ts->fs->ltrans_len += written;
 		autosync_check(ts->fs);
+
 		pthread_mutex_unlock(&(ts->fs->ltlock));
-	} else {
+
+		/* Leave the journal_free() up to jsync() */
+		jop = NULL;
+	} else if (jop) {
 		if (have_sync_range) {
 			for (op = ts->op; op != NULL; op = op->next) {
-				rv = sync_range_wait(ts->fs->fd, op->len,
+				if (op->direction == D_READ)
+					continue;
+
+				r = sync_range_wait(ts->fs->fd, op->len,
 						op->offset);
-				if (rv != 0)
+				if (r != 0)
 					goto rollback_exit;
 			}
 		} else {
 			if (fdatasync(ts->fs->fd) != 0)
 				goto rollback_exit;
 		}
-
-		/* the transaction has been applied, so we cleanup and remove
-		 * it from the disk */
-		rv = journal_free(jop);
-		if (rv != 0)
-			goto rollback_exit;
 	}
 
-	jop = NULL;
-
-	/* mark the transaction as committed, _after_ it was removed */
+	/* mark the transaction as committed */
 	ts->flags = ts->flags | J_COMMITTED;
 
+	retval = 1;
 
 rollback_exit:
 	/* If the transaction failed we try to recover by rolling it back.
+	 * Only used if it has at least one write operation.
 	 *
 	 * NOTE: on extreme conditions (ENOSPC/disk failure) this can fail
 	 * too! There's nothing much we can do in that case, the caller should
 	 * take care of it by itself.
 	 *
-	 * The transaction file might be OK at this point, so the data could
-	 * be recovered by a posterior jfsck(); however, that's not what the
-	 * user expects (after all, if we return failure, new data should
-	 * never appear), so we remove the transaction file (see unlink_exit).
-	 *
 	 * Transactions that were successfuly recovered by rolling them back
-	 * will have J_ROLLBACKED in their flags */
-	if (!(ts->flags & J_COMMITTED) && !(ts->flags & J_ROLLBACKING)) {
-		rv = ts->flags;
+	 * will have J_ROLLBACKED in their flags. */
+	if (jop && !(ts->flags & J_COMMITTED) &&
+			!(ts->flags & J_ROLLBACKING)) {
+		r = ts->flags;
 		ts->flags = ts->flags | J_NOLOCK | J_ROLLBACKING;
 		if (jtrans_rollback(ts) >= 0) {
-			ts->flags = rv | J_ROLLBACKED;
+			ts->flags = r | J_ROLLBACKED;
+			retval = -1;
 		} else {
-			ts->flags = rv;
+			ts->flags = r;
+			retval = -2;
 		}
 	}
 
 unlink_exit:
-	if (jop)
-		journal_free(jop);
+	/* If the journal operation is no longer needed, we remove it from the
+	 * disk.
+	 *
+	 * Extreme conditions (filesystem just got read-only, for example) can
+	 * cause journal_free() to fail, but there's not much left to do at
+	 * that point, and the caller will have to be careful and stop its
+	 * operations. In that case, we will return -2, and the transaction
+	 * will be marked as J_COMMITTED to indicate that the data was
+	 * effectively written to disk. */
+	if (jop) {
+		/* Note we only unlink if we've written down the real data, or
+		 * at least rolled it back properly */
+		int data_is_safe = (ts->flags & J_COMMITTED) ||
+			(ts->flags & J_ROLLBACKED);
+		r = journal_free(jop, data_is_safe ? 1 : 0);
+		if (r != 0)
+			retval = -2;
+
+		jop = NULL;
+	}
 
-unlock_exit:
 	/* always unlock everything at the end; otherwise we could have
 	 * half-overlapping transactions applying simultaneously, and if
 	 * anything goes wrong it would be possible to break consistency */
-	if (!(ts->flags & J_NOLOCK)) {
-		for (op = ts->op; op != NULL; op = op->next) {
-			if (op->locked) {
-				plockf(ts->fs->fd, F_UNLOCK,
-						op->offset, op->len);
-			}
-		}
-	}
+	lock_file_ranges(ts, F_UNLOCK);
 
 exit:
 	pthread_mutex_unlock(&(ts->lock));
 
-	/* return the length only if it was properly committed */
-	if (ts->flags & J_COMMITTED)
-		return written;
-	else if (ts->flags & J_ROLLBACKED)
-		return -1;
-	else
-		return -2;
+	return retval;
 }
 
 /* Rollback a transaction */
@@ -309,11 +449,16 @@ ssize_t jtrans_rollback(struct jtrans *ts)
 {
 	ssize_t rv;
 	struct jtrans *newts;
-	struct joper *op, *curop, *lop;
+	struct operation *op, *curop, *lop;
+
+	newts = jtrans_new(ts->fs, 0);
+	if (newts == NULL)
+		return -1;
 
-	newts = jtrans_new(ts->fs);
 	newts->flags = ts->flags;
-	newts->numops = ts->numops;
+	newts->numops_r = 0;
+	newts->numops_w = 0;
+	newts->len_w = 0;
 
 	if (ts->op == NULL || ts->flags & J_NOROLLBACK) {
 		rv = -1;
@@ -324,8 +469,11 @@ ssize_t jtrans_rollback(struct jtrans *ts)
 	for (op = ts->op; op->next != NULL; op = op->next)
 		;
 
-	/* and traverse the list backwards */
+	/* and traverse the list backwards, skipping read operations */
 	for ( ; op != NULL; op = op->prev) {
+		if (op->direction == D_READ)
+			continue;
+
 		/* if we extended the data in the previous transaction, we
 		 * should truncate it back */
 		/* DANGEROUS: this is one of the main reasons why rollbacking
@@ -340,7 +488,7 @@ ssize_t jtrans_rollback(struct jtrans *ts)
 		}
 
 		/* manually add the operation to the new transaction */
-		curop = malloc(sizeof(struct joper));
+		curop = malloc(sizeof(struct operation));
 		if (curop == NULL) {
 			rv = -1;
 			goto exit;
@@ -351,8 +499,12 @@ ssize_t jtrans_rollback(struct jtrans *ts)
 		curop->buf = op->pdata;
 		curop->plen = op->plen;
 		curop->pdata = op->pdata;
+		curop->direction = op->direction;
 		curop->locked = 0;
 
+		newts->numops_w++;
+		newts->len_w += curop->len;
+
 		/* add the new transaction to the list */
 		if (newts->op == NULL) {
 			newts->op = curop;
@@ -386,7 +538,7 @@ exit:
  */
 
 /* Open a file */
-struct jfs *jopen(const char *name, int flags, int mode, int jflags)
+struct jfs *jopen(const char *name, int flags, int mode, unsigned int jflags)
 {
 	int jfd, rv;
 	unsigned int t;
@@ -471,11 +623,13 @@ struct jfs *jopen(const char *name, int flags, int mode, int jflags)
 	if (fs->jdirfd < 0)
 		goto error_exit;
 
-	snprintf(jlockfile, PATH_MAX, "%s/%s", jdir, "lock");
+	snprintf(jlockfile, PATH_MAX, "%s/lock", jdir);
 	jfd = open(jlockfile, O_RDWR | O_CREAT, 0600);
 	if (jfd < 0)
 		goto error_exit;
 
+	fs->jfd = jfd;
+
 	/* initialize the lock file by writing the first tid to it, but only
 	 * if its empty, otherwise there is a race if two processes call
 	 * jopen() simultaneously and both initialize the file */
@@ -485,14 +639,11 @@ struct jfs *jopen(const char *name, int flags, int mode, int jflags)
 		t = 0;
 		rv = spwrite(jfd, &t, sizeof(t), 0);
 		if (rv != sizeof(t)) {
-			plockf(jfd, F_UNLOCK, 0, 0);
 			goto error_exit;
 		}
 	}
 	plockf(jfd, F_UNLOCK, 0, 0);
 
-	fs->jfd = jfd;
-
 	fs->jmap = (unsigned int *) mmap(NULL, sizeof(unsigned int),
 			PROT_READ | PROT_WRITE, MAP_SHARED, jfd, 0);
 	if (fs->jmap == MAP_FAILED)
@@ -521,10 +672,16 @@ int jsync(struct jfs *fs)
 	if (rv != 0)
 		return rv;
 
+	/* note the jops will be in order, so if we crash or fail in the
+	 * middle of this, there will be no problem applying the remaining
+	 * transactions */
 	pthread_mutex_lock(&(fs->ltlock));
 	while (fs->ltrans != NULL) {
 		fiu_exit_on("jio/jsync/pre_unlink");
-		journal_free(fs->ltrans->jop);
+		if (journal_free(fs->ltrans->jop, 1) != 0) {
+			pthread_mutex_unlock(&(fs->ltlock));
+			return -1;
+		}
 
 		ltmp = fs->ltrans->next;
 		free(fs->ltrans);
@@ -540,18 +697,19 @@ int jsync(struct jfs *fs)
 int jmove_journal(struct jfs *fs, const char *newpath)
 {
 	int ret;
-	char *oldpath, jlockfile[PATH_MAX];
+	char *oldpath, jlockfile[PATH_MAX], oldjlockfile[PATH_MAX];
 
 	/* we try to be sure that all lingering transactions have been
 	 * applied, so when we try to remove the journal directory, only the
-	 * lockfile is there; however, we do this just to be nice, but the
-	 * caller must be sure there are no in-flight transactions or any
-	 * other kind of operation around when he calls this function */
+	 * lockfile is there; however, we do this just to be nice, the caller
+	 * must be sure there are no in-flight transactions or any other kind
+	 * of operation around when he calls this function */
 	jsync(fs);
 
 	oldpath = fs->jdir;
+	snprintf(oldjlockfile, PATH_MAX, "%s/lock", fs->jdir);
 
-	fs->jdir = (char *) malloc(strlen(newpath + 1));
+	fs->jdir = (char *) malloc(strlen(newpath) + 1);
 	if (fs->jdir == NULL)
 		return -1;
 	strcpy(fs->jdir, newpath);
@@ -563,30 +721,20 @@ int jmove_journal(struct jfs *fs, const char *newpath)
 
 		close(fs->jdirfd);
 		fs->jdirfd = open(newpath, O_RDONLY);
-		if (fs->jdirfd < 0) {
-			ret = -1;
+		if (fs->jdirfd < 0)
 			goto exit;
-		}
 
-		close(fs->jfd);
-		snprintf(jlockfile, PATH_MAX, "%s/%s", newpath, "lock");
-		fs->jfd = open(jlockfile, O_RDWR | O_CREAT, 0600);
-		if (fs->jfd < 0)
-			goto exit;
-
-		munmap(fs->jmap, sizeof(unsigned int));
-		fs->jmap = (unsigned int *) mmap(NULL, sizeof(unsigned int),
-			PROT_READ | PROT_WRITE, MAP_SHARED, fs->jfd, 0);
-		if (fs->jmap == MAP_FAILED)
+		snprintf(jlockfile, PATH_MAX, "%s/lock", newpath);
+		ret = rename(oldjlockfile, jlockfile);
+		if (ret < 0)
 			goto exit;
 
 		/* remove the journal directory, if possible */
-		snprintf(jlockfile, PATH_MAX, "%s/%s", oldpath, "lock");
-		unlink(jlockfile);
+		unlink(oldjlockfile);
 		ret = rmdir(oldpath);
 		if (ret == -1) {
 			/* we couldn't remove it, something went wrong
-			 * (possible it had some files left) */
+			 * (possibly it had some files left) */
 			goto exit;
 		}
 
@@ -626,6 +774,7 @@ int jclose(struct jfs *fs)
 		free(fs->name);
 	if (fs->jdir)
 		free(fs->jdir);
+
 	pthread_mutex_destroy(&(fs->lock));
 
 	free(fs);
diff --git a/libjio/trans.h b/libjio/trans.h
index 366585e..466c2d9 100644
--- a/libjio/trans.h
+++ b/libjio/trans.h
@@ -2,8 +2,7 @@
 #ifndef _TRANS_H
 #define _TRANS_H
 
-
-struct joper;
+struct operation;
 
 /** A transaction */
 struct jtrans {
@@ -16,59 +15,64 @@ struct jtrans {
 	/** Transaction flags */
 	uint32_t flags;
 
-	/** Number of operations in the list */
-	unsigned int numops;
+	/** Number of read operations in the list */
+	unsigned int numops_r;
 
-	/** Transaction's length */
-	size_t len;
+	/** Number of write operations in the list */
+	unsigned int numops_w;
+
+	/** Sum of the lengths of the write operations */
+	size_t len_w;
 
 	/** Lock that protects the list of operations */
 	pthread_mutex_t lock;
 
 	/** List of operations */
-	struct joper *op;
+	struct operation *op;
 };
 
-/* a single operation */
-struct joper {
-	int locked;		/* is the region is locked? */
-	off_t offset;		/* operation's offset */
-	size_t len;		/* data length */
-	void *buf;		/* data */
-	size_t plen;		/* previous data length */
-	void *pdata;		/* previous data */
-	struct joper *prev;
-	struct joper *next;
+/** Possible operation directions */
+enum op_direction {
+	D_READ = 1,
+	D_WRITE = 2,
 };
 
-/* lingered transaction */
-struct journal_op;
-struct jlinger {
-	struct journal_op *jop;
-	struct jlinger *next;
-};
+/** A single operation */
+struct operation {
+	/** Is the region locked? */
+	int locked;
 
+	/** Operation's offset */
+	off_t offset;
 
-/* on-disk structures */
+	/** Data length, in bytes */
+	size_t len;
 
-/* header (fixed length, defined below) */
-struct disk_header {
-	uint32_t id;		/* id */
-	uint32_t flags;		/* flags about this transaction */
-	uint32_t numops;	/* number of operations */
-};
+	/** Data buffer */
+	void *buf;
 
-/* operation */
-struct disk_operation {
-	uint32_t len;		/* data length */
-	uint32_t plen;		/* previous data length */
-	uint64_t offset;	/* offset relative to the BOF */
-	char *prevdata;		/* previous data for rollback */
+	/** Direction */
+	enum op_direction direction;
+
+	/** Previous data length (only if direction == D_WRITE) */
+	size_t plen;
+
+	/** Previous data (only if direction == D_WRITE) */
+	void *pdata;
+
+	/** Previous operation */
+	struct operation *prev;
+
+	/** Next operation */
+	struct operation *next;
 };
 
-/* disk constants */
-#define J_DISKHEADSIZE	 12	/* length of disk_header */
-#define J_DISKOPHEADSIZE 16	/* length of disk_operation header */
+/* lingered transaction */
+struct journal_op;
+struct jlinger {
+	struct journal_op *jop;
+	struct jlinger *next;
+};
 
 
 #endif
diff --git a/libjio/unix.c b/libjio/unix.c
index 1a67847..076dbb7 100644
--- a/libjio/unix.c
+++ b/libjio/unix.c
@@ -21,7 +21,7 @@
 /* read() wrapper */
 ssize_t jread(struct jfs *fs, void *buf, size_t count)
 {
-	int rv;
+	ssize_t rv;
 	off_t pos;
 
 	pthread_mutex_lock(&(fs->lock));
@@ -32,10 +32,8 @@ ssize_t jread(struct jfs *fs, void *buf, size_t count)
 	rv = spread(fs->fd, buf, count, pos);
 	plockf(fs->fd, F_UNLOCK, pos, count);
 
-	if (rv > 0) {
-		/* if success, advance the file pointer */
+	if (rv > 0)
 		lseek(fs->fd, rv, SEEK_CUR);
-	}
 
 	pthread_mutex_unlock(&(fs->lock));
 
@@ -45,7 +43,7 @@ ssize_t jread(struct jfs *fs, void *buf, size_t count)
 /* pread() wrapper */
 ssize_t jpread(struct jfs *fs, void *buf, size_t count, off_t offset)
 {
-	int rv;
+	ssize_t rv;
 
 	plockf(fs->fd, F_LOCKR, offset, count);
 	rv = spread(fs->fd, buf, count, offset);
@@ -57,19 +55,18 @@ ssize_t jpread(struct jfs *fs, void *buf, size_t count, off_t offset)
 /* readv() wrapper */
 ssize_t jreadv(struct jfs *fs, const struct iovec *vector, int count)
 {
-	int rv, i;
-	size_t sum;
+	ssize_t rv;
 	off_t pos;
 
-	sum = 0;
-	for (i = 0; i < count; i++)
-		sum += vector[i].iov_len;
-
 	pthread_mutex_lock(&(fs->lock));
 	pos = lseek(fs->fd, 0, SEEK_CUR);
+	if (pos < 0)
+		return -1;
+
 	plockf(fs->fd, F_LOCKR, pos, count);
 	rv = readv(fs->fd, vector, count);
 	plockf(fs->fd, F_UNLOCK, pos, count);
+
 	pthread_mutex_unlock(&(fs->lock));
 
 	return rv;
@@ -83,11 +80,11 @@ ssize_t jreadv(struct jfs *fs, const struct iovec *vector, int count)
 /* write() wrapper */
 ssize_t jwrite(struct jfs *fs, const void *buf, size_t count)
 {
-	int rv;
+	ssize_t rv;
 	off_t pos;
 	struct jtrans *ts;
 
-	ts = jtrans_new(fs);
+	ts = jtrans_new(fs, 0);
 	if (ts == NULL)
 		return -1;
 
@@ -98,16 +95,14 @@ ssize_t jwrite(struct jfs *fs, const void *buf, size_t count)
 	else
 		pos = lseek(fs->fd, 0, SEEK_CUR);
 
-	rv = jtrans_add(ts, buf, count, pos);
+	rv = jtrans_add_w(ts, buf, count, pos);
 	if (rv < 0)
 		goto exit;
 
 	rv = jtrans_commit(ts);
 
-	if (rv > 0) {
-		/* if success, advance the file pointer */
-		lseek(fs->fd, rv, SEEK_CUR);
-	}
+	if (rv >= 0)
+		lseek(fs->fd, count, SEEK_CUR);
 
 exit:
 
@@ -115,20 +110,20 @@ exit:
 
 	jtrans_free(ts);
 
-	return rv;
+	return (rv >= 0) ? count : rv;
 }
 
 /* pwrite() wrapper */
 ssize_t jpwrite(struct jfs *fs, const void *buf, size_t count, off_t offset)
 {
-	int rv;
+	ssize_t rv;
 	struct jtrans *ts;
 
-	ts = jtrans_new(fs);
+	ts = jtrans_new(fs, 0);
 	if (ts == NULL)
 		return -1;
 
-	rv = jtrans_add(ts, buf, count, offset);
+	rv = jtrans_add_w(ts, buf, count, offset);
 	if (rv < 0)
 		goto exit;
 
@@ -137,18 +132,19 @@ ssize_t jpwrite(struct jfs *fs, const void *buf, size_t count, off_t offset)
 exit:
 	jtrans_free(ts);
 
-	return rv;
+	return (rv >= 0) ? count : rv;
 }
 
 /* writev() wrapper */
 ssize_t jwritev(struct jfs *fs, const struct iovec *vector, int count)
 {
-	int rv, i;
+	int i;
 	size_t sum;
+	ssize_t rv;
 	off_t ipos, t;
 	struct jtrans *ts;
 
-	ts = jtrans_new(fs);
+	ts = jtrans_new(fs, 0);
 	if (ts == NULL)
 		return -1;
 
@@ -163,7 +159,8 @@ ssize_t jwritev(struct jfs *fs, const struct iovec *vector, int count)
 
 	sum = 0;
 	for (i = 0; i < count; i++) {
-		rv = jtrans_add(ts, vector[i].iov_base, vector[i].iov_len, t);
+		rv = jtrans_add_w(ts, vector[i].iov_base,
+				vector[i].iov_len, t);
 		if (rv < 0)
 			goto exit;
 
@@ -173,18 +170,15 @@ ssize_t jwritev(struct jfs *fs, const struct iovec *vector, int count)
 
 	rv = jtrans_commit(ts);
 
-	if (rv > 0) {
-		/* if success, advance the file pointer */
-		lseek(fs->fd, rv, SEEK_CUR);
-	}
+	if (rv >= 0)
+		lseek(fs->fd, sum, SEEK_CUR);
 
 exit:
 	pthread_mutex_unlock(&(fs->lock));
 
 	jtrans_free(ts);
 
-	return rv;
-
+	return (rv >= 0) ? sum : rv;
 }
 
 /* Truncate a file. Be careful with this */
diff --git a/samples/full.c b/samples/full.c
index dd621b3..5da9af8 100644
--- a/samples/full.c
+++ b/samples/full.c
@@ -27,9 +27,9 @@ int main(void)
 	}
 
 	/* write two "Hello world"s next to each other */
-	trans = jtrans_new(file);
-	jtrans_add(trans, TEXT, strlen(TEXT), 0);
-	jtrans_add(trans, TEXT, strlen(TEXT), strlen(TEXT));
+	trans = jtrans_new(file, 0);
+	jtrans_add_w(trans, TEXT, strlen(TEXT), 0);
+	jtrans_add_w(trans, TEXT, strlen(TEXT), strlen(TEXT));
 	r = jtrans_commit(trans);
 	if (r < 0) {
 		perror("jtrans_commit");
diff --git a/samples/jio3.c b/samples/jio3.c
index c71c727..b5a6bd3 100644
--- a/samples/jio3.c
+++ b/samples/jio3.c
@@ -19,21 +19,21 @@ int main(int argc, char **argv)
 	if (fs == NULL)
 		perror("jopen()");
 
-	ts = jtrans_new(fs);
+	ts = jtrans_new(fs, 0);
 	if (ts == NULL)
 		perror("jtrans_new()");
 
 #define str1 "1ROLLBACKTEST1!\n"
-	jtrans_add(ts, str1, strlen(str1), 0);
+	jtrans_add_w(ts, str1, strlen(str1), 0);
 
 #define str2 "2ROLLBACKTEST2!\n"
-	jtrans_add(ts, str2, strlen(str2), strlen(str1));
+	jtrans_add_w(ts, str2, strlen(str2), strlen(str1));
 
 #define str3 "3ROLLBACKTEST3!\n"
-	jtrans_add(ts, str3, strlen(str3), strlen(str1) + strlen(str2));
+	jtrans_add_w(ts, str3, strlen(str3), strlen(str1) + strlen(str2));
 
 	rv = jtrans_commit(ts);
-	if (rv != strlen(str1) + strlen(str2) + strlen(str3))
+	if (rv < 0)
 		perror("jtrans_commit()");
 	printf("commit ok: %d\n", rv);
 
diff --git a/tests/behaviour/runtests b/tests/behaviour/runtests
index 2860493..a078083 100755
--- a/tests/behaviour/runtests
+++ b/tests/behaviour/runtests
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 import sys
+import resource
 import tf
 
 possible_tests = ('normal', 'corruption', 'fi')
@@ -18,6 +19,11 @@ specific_test = None
 if len(sys.argv) >= 3:
 	specific_test = sys.argv[2]
 
+M = 1024 * 1024
+resource.setrlimit(resource.RLIMIT_CPU, (120, 120))
+resource.setrlimit(resource.RLIMIT_FSIZE, (100 * M, 100 * M))
+resource.setrlimit(resource.RLIMIT_AS, (500 * M, 500 * M))
+
 for mn in mnames:
 	print '--', mn
 	tf.autorun(__import__(mn), specific_test)
diff --git a/tests/behaviour/t_corruption.py b/tests/behaviour/t_corruption.py
index 35a1998..0a9d270 100644
--- a/tests/behaviour/t_corruption.py
+++ b/tests/behaviour/t_corruption.py
@@ -27,8 +27,11 @@ def test_c01():
 	n = run_with_tmp(f1)
 	assert content(n) == ''
 	tc = open(transpath(n, 1)).read()
-	# flip just one bit of the first byte
-	tc = chr((ord(tc[0]) & 0xFE) | (~ ord(tc[0]) & 0x1) & 0xFF) + tc[1:]
+	# flip just one bit in the transaction data
+	pos = DHS + DOHS + len(c) / 2
+	tc = tc[:pos] + \
+		chr((ord(tc[pos]) & 0xFE) | (~ ord(tc[pos]) & 0x1) & 0xFF) + \
+		tc[pos + 1:]
 	open(transpath(n, 1), 'w').write(tc)
 	fsck_verify(n, corrupt = 1)
 	assert content(n) == ''
@@ -86,4 +89,89 @@ def test_c04():
 	assert content(n) == ''
 	cleanup(n)
 
+def test_c05():
+	"truncate trans (tiny)"
+	c = gencontent()
+
+	def f1(f, jf):
+		fiu.enable("jio/commit/tf_sync")
+		jf.write(c)
+
+	n = run_with_tmp(f1)
+	assert content(n) == ''
+	tp = transpath(n, 1)
+	open(tp, 'r+').truncate(2)
+	fsck_verify(n, broken = 1)
+	assert content(n) == ''
+	cleanup(n)
+
+def test_c06():
+	"header version != 1"
+	c = gencontent()
+
+	def f1(f, jf):
+		fiu.enable("jio/commit/tf_sync")
+		jf.write(c)
+
+	n = run_with_tmp(f1)
+	assert content(n) == ''
+
+	# there is no need to recalculate the checsum because it is verified
+	# after the version check
+	tf = TransFile(transpath(n, 1))
+	tf.ver = 8
+	tf.save()
+	fsck_verify(n, broken = 1)
+	assert content(n) == ''
+	cleanup(n)
+
+def test_c07():
+	"trailer numops mismatch"
+	c = gencontent()
+
+	def f1(f, jf):
+		fiu.enable("jio/commit/tf_sync")
+		jf.write(c)
+
+	n = run_with_tmp(f1)
+	assert content(n) == ''
+
+	# there is no need to recalculate the checsum because it is verified
+	# after the numops check
+	tf = TransFile(transpath(n, 1))
+	tf.numops = 55
+	tf.save()
+	fsck_verify(n, broken = 1)
+	assert content(n) == ''
+	cleanup(n)
+
+def test_c08():
+	"broken journal"
+	c = gencontent()
+
+	f, jf = bitmp(jflags = 0)
+	n = f.name
+
+	def f1(f, jf):
+		fiu.enable("jio/commit/tf_sync")
+		jf.write(c)
+
+	run_forked(f1, f, jf)
+
+	assert content(n) == ''
+	open(jiodir(n) + '/broken', 'w+')
+
+	def f2(f, jf):
+		try:
+			jf.pwrite(c, 200)
+		except IOError:
+			return
+		raise RuntimeError
+
+	run_forked(f2, f, jf)
+
+	fsck_verify(n, reapplied = 1)
+	assert content(n) == c
+	assert not os.path.exists(jiodir(n) + '/broken')
+	cleanup(n)
 
diff --git a/tests/behaviour/t_fi.py b/tests/behaviour/t_fi.py
index 7490a08..419030d 100644
--- a/tests/behaviour/t_fi.py
+++ b/tests/behaviour/t_fi.py
@@ -65,20 +65,20 @@ def test_f03():
 	cleanup(n)
 
 def test_f04():
-	"fail jio/commit/tf_ophdr"
+	"fail jio/commit/tf_pre_addop"
 	c = gencontent()
 
 	def f1(f, jf):
-		fiu.enable_external("jio/commit/tf_ophdr",
-				gen_ret_after(1, 0, 1))
+		fiu.enable_external("jio/commit/tf_pre_addop",
+				gen_ret_seq((0, 1)))
 		t = jf.new_trans()
-		t.add(c, 0)
-		t.add(c, len(c) + 200)
+		t.add_w(c, 0)
+		t.add_w(c, len(c) + 200)
 		t.commit()
 
 	n = run_with_tmp(f1)
 
-	assert len(content(transpath(n, 1))) == DHS + DOHS + len(c) + DOHS
+	assert len(content(transpath(n, 1))) == DHS + DOHS + len(c)
 	assert content(n) == ''
 	fsck_verify(n, broken = 1)
 	assert content(n) == ''
@@ -90,10 +90,10 @@ def test_f05():
 
 	def f1(f, jf):
 		fiu.enable_external("jio/commit/tf_opdata",
-				gen_ret_after(1, 0, 1))
+				gen_ret_seq((0, 1)))
 		t = jf.new_trans()
-		t.add(c, 0)
-		t.add(c, len(c) + 200)
+		t.add_w(c, 0)
+		t.add_w(c, len(c) + 200)
 		t.commit()
 
 	n = run_with_tmp(f1)
@@ -111,8 +111,8 @@ def test_f06():
 	def f1(f, jf):
 		fiu.enable("jio/commit/tf_data")
 		t = jf.new_trans()
-		t.add(c, 0)
-		t.add(c, len(c) + 200)
+		t.add_w(c, 0)
+		t.add_w(c, len(c) + 200)
 		t.commit()
 
 	n = run_with_tmp(f1)
@@ -144,8 +144,8 @@ def test_f08():
 	def f1(f, jf):
 		fiu.enable("jio/commit/wrote_op")
 		t = jf.new_trans()
-		t.add(c, 0)
-		t.add(c, len(c) + 200)
+		t.add_w(c, 0)
+		t.add_w(c, len(c) + 200)
 		t.commit()
 
 	n = run_with_tmp(f1)
@@ -192,7 +192,7 @@ def test_f11():
 	def f1(f, jf):
 		jf.write('x' * (80 + len(c)))
 		t = jf.new_trans()
-		t.add(c, 80)
+		t.add_w(c, 80)
 		t.commit()
 		assert content(f.name) == 'x' * 80 + c
 		fiu.enable("jio/commit/tf_sync")
@@ -212,7 +212,7 @@ def test_f12():
 	def f1(f, jf):
 		fiu.enable("jio/jsync/pre_unlink")
 		t = jf.new_trans()
-		t.add(c, 0)
+		t.add_w(c, 0)
 		t.commit()
 		jf.jsync()
 
diff --git a/tests/behaviour/t_normal.py b/tests/behaviour/t_normal.py
index 520d3a4..a9ba8df 100644
--- a/tests/behaviour/t_normal.py
+++ b/tests/behaviour/t_normal.py
@@ -22,7 +22,8 @@ def test_n02():
 	c = gencontent()
 
 	def f1(f, jf):
-		jf.write(c)
+		jf.write(c[:len(c) / 2])
+		jf.write(c[len(c) / 2:])
 		jf.lseek(0, 0)
 		assert jf.read(len(c) * 2) == c
 
@@ -59,7 +60,7 @@ def test_n05():
 
 	def f1(f, jf):
 		t = jf.new_trans()
-		t.add(c, 80)
+		t.add_w(c, 80)
 		t.commit()
 
 	n = run_with_tmp(f1)
@@ -73,7 +74,7 @@ def test_n06():
 
 	def f1(f, jf):
 		t = jf.new_trans()
-		t.add(c, 80)
+		t.add_w(c, 80)
 		t.commit()
 		t.rollback()
 
@@ -94,7 +95,7 @@ def test_n07():
 	def f1(f, jf):
 		jf.write(c1)
 		t = jf.new_trans()
-		t.add(c2, len(c1) - 973)
+		t.add_w(c2, len(c1) - 973)
 		t.commit()
 		t.rollback()
 
@@ -115,10 +116,10 @@ def test_n08():
 	def f1(f, jf):
 		jf.write(c1)
 		t = jf.new_trans()
-		t.add(c2, len(c1) - 973)
-		t.add(c3, len(c1) - 1041)
-		t.add(c4, len(c1) - 666)
-		t.add(c5, len(c1) - 3000)
+		t.add_w(c2, len(c1) - 973)
+		t.add_w(c3, len(c1) - 1041)
+		t.add_w(c4, len(c1) - 666)
+		t.add_w(c5, len(c1) - 3000)
 		t.commit()
 
 	n = run_with_tmp(f1)
@@ -137,10 +138,10 @@ def test_n09():
 	def f1(f, jf):
 		jf.write(c1)
 		t = jf.new_trans()
-		t.add(c2, len(c1) - 973)
-		t.add(c3, len(c1) - 1041)
-		t.add(c4, len(c1) - 666)
-		t.add(c5, len(c1) - 3000)
+		t.add_w(c2, len(c1) - 973)
+		t.add_w(c3, len(c1) - 1041)
+		t.add_w(c4, len(c1) - 666)
+		t.add_w(c5, len(c1) - 3000)
 		t.commit()
 		t.rollback()
 
@@ -156,7 +157,7 @@ def test_n10():
 
 	def f1(f, jf):
 		t = jf.new_trans()
-		t.add(c, 0)
+		t.add_w(c, 0)
 		t.commit()
 		del t
 		assert content(f.name) == c
@@ -170,4 +171,226 @@ def test_n10():
 	fsck_verify(n)
 	cleanup(n)
 
+def test_n11():
+	"jfsck a nonexisting file"
+	try:
+		libjio.jfsck('this file does not exist')
+	except IOError:
+		return
+	raise
+
+def test_n12():
+	"jfsck with a nonexisting dir"
+	f, jf = bitmp()
+	try:
+		libjio.jfsck(f.name, 'this directory does not exist')
+	except IOError:
+		cleanup(f.name)
+		return
+	raise
+
+def test_n13():
+	"move journal to a nonexisting dir"
+	import os
+
+	f, jf = bitmp()
+	n = f.name
+	p = tmppath()
+
+	jf.write('x')
+	jf.jmove_journal(p)
+	jf.write('y')
+	del jf
+
+	assert libjio.jfsck(n, p)['total'] == 0
+	os.unlink(n)
+
+def test_n14():
+	"autosync"
+	f, jf = bitmp(jflags = libjio.J_LINGER)
+	n = f.name
+
+	jf.autosync_start(1, 10)
+	jf.write('x' * 200)
+	jf.write('x' * 200)
+	jf.autosync_stop()
+	del jf
+
+	fsck_verify(n)
+	cleanup(n)
+
+def test_n15():
+	"jpread/jpwrite"
+	c = gencontent()
+
+	f, jf = bitmp(jflags = libjio.J_LINGER)
+	n = f.name
+
+	jf.pwrite(c, 2000)
+	assert content(n) == '\0' * 2000 + c
+	assert jf.pread(len(c), 2000) == c
+	del jf
+
+	fsck_verify(n)
+	cleanup(n)
+
+def test_n16():
+	"jopen r/o + jtrans_add_w + jtrans_commit"
+	c = gencontent()
+
+	# create the file before opening, read-only mode does not create it
+	n = tmppath()
+	open(n, 'w+')
+	f, jf = biopen(n, mode = 'r')
+
+	t = jf.new_trans()
+
+	try:
+		t.add_w(c, 80)
+	except IOError:
+		pass
+	else:
+		raise AssertionError
+
+	try:
+		# note this fails because there are no ops to commit
+		t.commit()
+	except IOError:
+		pass
+	else:
+		raise AssertionError
+
+	cleanup(n)
+
+def test_n17():
+	"move journal to an existing dir"
+	import os
+
+	f, jf = bitmp()
+	n = f.name
+	p = tmppath()
+	os.mkdir(p)
+	open(p + '/x', 'w')
+
+	jf.write('x')
+	jf.jmove_journal(p)
+	jf.write('y')
+	del jf
+	os.unlink(p + '/x')
+
+	assert libjio.jfsck(n, p)['total'] == 0
+	os.unlink(n)
+
+def test_n18():
+	"jtrans_rollback with norollback"
+	c = gencontent()
+	f, jf = bitmp(jflags = libjio.J_NOROLLBACK)
+	n = f.name
+
+	t = jf.new_trans()
+	t.add_w(c, 80)
+	t.commit()
+	try:
+		t.rollback()
+	except IOError:
+		pass
+	else:
+		raise AssertionError
+
+	assert content(n) == '\0' * 80 + c
+	fsck_verify(n)
+	cleanup(n)
+
+def test_n19():
+	"jwrite in files opened with O_APPEND"
+	c1 = gencontent()
+	c2 = gencontent()
+	f, jf = bitmp(mode = 'a')
+	n = f.name
+
+	jf.write(c1)
+	jf.write(c2)
+
+	assert content(n) == c1 + c2
+	fsck_verify(n)
+	cleanup(n)
+
+def test_n20():
+	"jtrans_add_w of 0 length"
+	f, jf = bitmp()
+	n = f.name
+
+	t = jf.new_trans()
+
+	try:
+		t.add_w('', 80)
+	except IOError:
+		pass
+	else:
+		raise AssertionError
+
+	del t
+	del jf
+	fsck_verify(n)
+	cleanup(n)
+
+def test_n21():
+	"jwritev and jreadv"
+	f, jf = bitmp()
+	n = f.name
+
+	jf.writev(["hello ", "world"])
+	l = [bytearray("......"), bytearray(".....")]
+	jf.lseek(0, 0)
+	jf.readv(l)
+
+	assert content(n) == "hello world"
+	assert l[0] == "hello " and l[1] == "world"
+	fsck_verify(n)
+	cleanup(n)
+
+def test_n22():
+	"jpread/jpwrite ~2mb"
+	c = gencontent(2 * 1024 * 1024 + 1465)
+
+	f, jf = bitmp(jflags = libjio.J_LINGER)
+	n = f.name
+
+	jf.pwrite(c, 2000)
+	assert content(n) == '\0' * 2000 + c
+	assert jf.pread(len(c), 2000) == c
+	del jf
+
+	fsck_verify(n)
+	cleanup(n)
+
+def test_n23():
+	"jtrans_add_w + jtrans_add_r"
+	f, jf = bitmp()
+	n = f.name
+
+	c1 = gencontent(1000)
+	c2 = gencontent(1000)
+	c3 = gencontent(1000)
+
+	buf1 = bytearray(0 for i in range(30))
+	buf2 = bytearray(0 for i in range(100))
+
+	t = jf.new_trans()
+	t.add_w(c1, 0)
+	t.add_r(buf1, 0)
+	t.add_w(c2, len(c2))
+	t.add_r(buf2, len(c1) - len(buf2) / 2)
+	t.add_w(c3, len(c1) + len(c2))
+	t.commit()
+
+	assert content(n) == c1 + c2 + c3
+	assert buf1 == c1[:len(buf1)]
+	assert buf2 == c1[-(len(buf2) / 2):] + c2[:len(buf2) / 2]
+
+	del t
+	del jf
+	fsck_verify(n)
+	cleanup(n)
+
 
diff --git a/tests/behaviour/tf.py b/tests/behaviour/tf.py
index 786f24c..2f3f55c 100644
--- a/tests/behaviour/tf.py
+++ b/tests/behaviour/tf.py
@@ -17,9 +17,10 @@ import struct
 import libjio
 
 
-# Useful constants, must match libjio.h
-DHS = 12	# disk header size
-DOHS = 16	# disk op header size
+# Useful constants, must match journal.h
+DHS = 8		# disk header size
+DOHS = 12	# disk op header size
+DTS = 8		# disk trailer size
 
 
 def tmppath():
@@ -37,6 +38,7 @@ def tmppath():
 
 def run_forked(f, *args, **kwargs):
 	"""Runs the function in a different process."""
+	sys.stdout.flush()
 	pid = os.fork()
 	if pid == 0:
 		# child
@@ -80,6 +82,8 @@ def biopen(path, mode = 'w+', jflags = 0):
 		flags = os.O_RDWR
 		if '+' in mode:
 			flags = flags | os.O_CREAT | os.O_TRUNC
+	elif 'a' in mode:
+		flags = os.O_RDWR | os.O_APPEND
 	else:
 		raise RuntimeError
 
@@ -107,9 +111,9 @@ def transpath(path, ntrans):
 	jpath = jiodir(path)
 	return jpath + '/' + str(ntrans)
 
-def fsck(path):
+def fsck(path, flags = 0):
 	"Calls libjio's jfsck()."
-	res = libjio.jfsck(path)
+	res = libjio.jfsck(path, flags = flags)
 	return res
 
 def fsck_verify(n, **kwargs):
@@ -123,11 +127,10 @@ def fsck_verify(n, **kwargs):
 		'reapplied': 0,
 		'corrupt': 0,
 		'in_progress': 0,
-		'apply_error': 0,
 	}
 	expected.update(kwargs)
 	expected['total'] = sum(expected.values())
-	res = fsck(n)
+	res = fsck(n, flags = libjio.J_CLEANUP)
 
 	for k in expected:
 		if k not in res:
@@ -159,9 +162,11 @@ class attrdict (dict):
 
 class TransFile (object):
 	def __init__(self, path = ''):
+		self.ver = 1
 		self.id = -1
 		self.flags = 0
 		self.numops = -1
+		self.checksum = -1
 		self.ops = []
 		self.path = path
 		if path:
@@ -171,30 +176,39 @@ class TransFile (object):
 		fd = open(self.path)
 
 		# header
-		hdrfmt = "III"
-		self.id, self.flags, self.numops = struct.unpack(hdrfmt,
+		hdrfmt = "!HHI"
+		self.ver, self.flags, self.id = struct.unpack(hdrfmt,
 				fd.read(struct.calcsize(hdrfmt)))
 
 		# operations (header only)
-		opfmt = "IIQ"
+		opfmt = "!IQ"
 		self.ops = []
-		for i in range(self.numops):
-			tlen, plen, offset = struct.unpack(opfmt,
+		while True:
+			tlen, offset = struct.unpack(opfmt,
 					fd.read(struct.calcsize(opfmt)))
+			if tlen == offset == 0:
+				break
 			payload = fd.read(tlen)
 			assert len(payload) == tlen
-			self.ops.append(attrdict(tlen = tlen, plen = plen,
-				offset = offset, payload = payload))
+			self.ops.append(attrdict(tlen = tlen, offset = offset,
+				payload = payload))
+
+		# trailer
+		trailerfmt = "!II"
+		self.numops, self.checksum = struct.unpack(trailerfmt,
+				fd.read(struct.calcsize(trailerfmt)))
 
 	def save(self):
 		# the lack of integrity checking in this function is
 		# intentional, so we can write broken transactions and see how
 		# jfsck() copes with them
 		fd = open(self.path, 'w')
-		fd.write(struct.pack("III", self.id, self.flags, self.numops))
+		fd.write(struct.pack("!HHI", self.ver, self.flags, self.id))
 		for o in self.ops:
-			fd.write(struct.pack("IIQs", o.tlen, o.plen, o.offset,
-				o.payload))
+			fd.write(struct.pack("!IQ", o.tlen, o.offset,))
+			fd.write(o.payload)
+		fd.write(struct.pack("!IQ", 0, 0))
+		fd.write(struct.pack("!II", self.numops, self.checksum))
 
 	def __repr__(self):
 		return '<TransFile %s: id:%d f:%s n:%d ops:%s>' % \
@@ -202,18 +216,21 @@ class TransFile (object):
 					self.ops)
 
 
-def gen_ret_after(n, notyet, itstime):
-	"""Returns a function that returns value of notyet the first n
-	invocations, and itstime afterwards."""
-	holder = [n]
+def gen_ret_seq(seq):
+	"""Returns a function that each time it is called returns a value of
+	the given sequence, in order. When the sequence is exhausted, returns
+	the last value."""
+	it = iter(seq)
+	last = [0]
 	def newf(*args, **kwargs):
-		holder[0] -= 1
-		if holder[0] >= 0:
-			return notyet
-		return itstime
+		try:
+			r = it.next()
+			last[0] = r
+			return r
+		except StopIteration:
+			return last[0]
 	return newf
 
-
 def autorun(module, specific_test = None):
 	"Runs all the functions in the given module that begin with 'test'."
 	for name in sorted(dir(module)):
diff --git a/tests/stress/jiostress b/tests/stress/jiostress
index 76c3d6b..1712c04 100755
--- a/tests/stress/jiostress
+++ b/tests/stress/jiostress
@@ -37,9 +37,37 @@ def randfrange(maxend, maxsize):
 	size = random.randint(0, (maxend - 1) - start) % maxsize
 	return start, start + size
 
+def randfloat(min, max):
+	return min + random.random() % (max - min)
+
 class ConsistencyError (Exception):
 	pass
 
+def jfsck(fname):
+	try:
+		r = libjio.jfsck(fname)
+		return r
+	except IOError as e:
+		if e.args[0] == libjio.J_ENOJOURNAL:
+			return { 'total': 0 }
+		else:
+			raise
+
+def comp_cont(bytes):
+	"'aaaabbcc' -> [ ('a', 4), ('b', 2), ('c', 2) ]"
+	l = []
+	prev = bytes[0]
+	c = 1
+	for b in bytes[1:]:
+		if (b == prev):
+			c += 1
+			continue
+
+		l.append((prev, c))
+		prev = b
+		c = 1
+	return l
+
 
 #
 # The test itself
@@ -67,7 +95,7 @@ class Stresser:
 		self.jf.truncate(fsize)
 
 		if use_as:
-			self.jf.autosync_start(5, 2 * 1024 * 1024)
+			self.jf.autosync_start(5, 10 * 1024 * 1024)
 
 		# data used for consistency checks
 		self.current_range = (0, 0)
@@ -77,11 +105,20 @@ class Stresser:
 	def pread(self, start, end):
 		ppos = self.f.tell()
 		self.f.seek(start, 0)
-		r = self.f.read(end - start)
+		r = bytes()
+		c = 0
+		total = end - start
+		while c < total:
+			n = self.f.read(total - c)
+			if (n == ''):
+				break
+			c += len(n)
+			r += n
 		self.f.seek(ppos, 0)
+		assert c == end - start
 		return r
 
-	def randwrite(self):
+	def prep_randwrite(self):
 		start, end = randfrange(self.fsize, self.maxoplen)
 
 		# read an extended range so we can check we
@@ -94,18 +131,38 @@ class Stresser:
 		nd = getbytes(end - start)
 		self.new_data = self.prev_data[:start - estart] \
 			+ nd + self.prev_data[- (eend - end):]
+		return nd, start
+
+	def randwrite(self, nd, start):
 		self.jf.pwrite(nd, start)
 		return True
 
 	def randwrite_fork(self):
+		# do the prep before the fork so we can verify() afterwards
+		nd, start = self.prep_randwrite()
+		sys.stdout.flush()
 		pid = os.fork()
 		if pid == 0:
 			# child
 			try:
-				self.randwrite()
-			except IOError:
+				self.fiu_enable()
+				self.randwrite(nd, start)
+				self.fiu_disable()
+			except (IOError, MemoryError):
+				try:
+					self.reopen()
+				except (IOError, MemoryError):
+					pass
+				except:
+					self.fiu_disable()
+					traceback.print_exc()
+				self.fiu_disable()
+				sys.exit(1)
+			except MemoryError:
+				self.fiu_disable()
 				sys.exit(1)
 			except:
+				self.fiu_disable()
 				traceback.print_exc()
 				sys.exit(1)
 			sys.exit(0)
@@ -124,11 +181,18 @@ class Stresser:
 		real_data = self.pread(self.current_range[0],
 				self.current_range[1])
 		if real_data not in (self.prev_data, self.new_data):
+			print('Corruption detected')
+			print('Range:', self.current_range)
+			print('Real:', comp_cont(real_data))
+			print('Prev:', comp_cont(self.prev_data))
+			print('New: ', comp_cont(self.new_data))
+			print()
 			raise ConsistencyError
 
 	def reopen(self):
 		self.jf = None
-		r = libjio.jfsck(self.fname)
+		r = jfsck(self.fname)
+
 		self.verify()
 
 		self.jf = libjio.open(self.fname,
@@ -136,15 +200,30 @@ class Stresser:
 		return r
 
 	def fiu_enable(self):
-		if self.use_fi:
-			fiu.enable_random('jio/*', probability = 0.02)
+		if not self.use_fi:
+			return
+
+		# To improve code coverage, we randomize the probability each
+		# time we enable failure points
+		fiu.enable_random('jio/*',
+				probability = randfloat(0.0005, 0.005))
+		fiu.enable_random('linux/*',
+				probability = randfloat(0.005, 0.03))
+		fiu.enable_random('posix/*',
+			probability = randfloat(0.005, 0.03))
+		fiu.enable_random('libc/mm/*',
+			probability = randfloat(0.003, 0.07))
+		fiu.enable_random('libc/str/*',
+			probability = randfloat(0.005, 0.07))
 
 	def fiu_disable(self):
 		if self.use_fi:
+			fiu.disable('libc/mm/*')
+			fiu.disable('posix/*')
 			fiu.disable('jio/*')
+			fiu.disable('linux/*')
 
 	def run(self):
-		self.fiu_enable()
 		nfailures = 0
 		sys.stdout.write("  ")
 		for i in range(1, self.nops + 1):
@@ -159,17 +238,14 @@ class Stresser:
 			if self.use_fi:
 				r = self.randwrite_fork()
 			else:
-				r = self.randwrite()
+				nd, start = self.prep_randwrite()
+				r = self.randwrite(nd, start)
 			if not r:
 				nfailures += 1
-				self.fiu_disable()
 				r = self.reopen()
-				assert r['total'] <= 1
-				self.fiu_enable()
 			self.verify()
 		sys.stdout.write("\n")
 		sys.stdout.flush()
-		self.fiu_disable()
 		return nfailures
 
 
@@ -224,8 +300,7 @@ def main():
 	print("  %d operations" % nops)
 	print("  %d simulated failures" % nfailures)
 
-	r = libjio.jfsck(fname)
-	assert r['total'] == 0
+	r = jfsck(fname)
 	print("Final check completed")
 	#os.unlink(fname)
 

