diff --git a/README b/README
index dd9788d..5af1d22 100644
--- a/README
+++ b/README
@@ -114,21 +114,10 @@ Q. How much slower is the unlimited mode?
 A. It depends on 2 things. First, just how much larger than your ram the file
 is, as the bigger the difference, the slower it will be. The second is how much
 redundant data there is. The more there is, the slower, but ultimately the
-better the compression. Using the example of a 10GB virtual image on a machine
-with 8GB ram, it would allocate about 5.5GB by default, yet is capable of
-allocating all the ram for the 10GB file in -M mode.
-
-Options		Size		Compress	Decompress
--l		1793312108	05m13s		3m12s
--lM		1413268368	04m18s		2m54s
--lU		1413268368	06m05s		2m54s
-
-As you can see, the -U option gives the same compression in this case as the
--M option, and for about 50% more time. The advantage to using -U is that it
-will work even when the size can't be encompassed by -M, but progressively
-slower. Why isn't it on by default? If the compression window is a LOT larger
-than ram, with a lot of redundant information it can be drastically slower. I
-may revisit this possibility in the future if I can make it any faster.
+better the compression. Why isn't it on by default? If the compression window is
+a LOT larger than ram, with a lot of redundant information it can be drastically
+slower. I may revisit this possibility in the future if I can make it any
+faster.
 
 Q. Can I use your tool for even more compression than lzma offers?
 A. Yes, the rzip preparation of files makes them more compressible by every
@@ -256,8 +245,8 @@ possible with the -M option, and going beyond that with the -U option.
 Q. Can I use swapspace as ram for lrzip with a massive window?
 A. It will indirectly do this with -M mode enabled. If you want the windows
 even larger, -U (unlimited) mode will make the compression window as big as
-the file itself no matter how big it is, but it will slow down 100 times
-during the compression phase once it has reached your full ram.
+the file itself no matter how big it is, but it will slow down proportionately
+more the bigger the file is than your ram.
 
 Q. Why do you nice it to +19 by default? Can I speed up the compression by
 changing the nice value?
diff --git a/TODO b/TODO
index e64cb5c..a45cd75 100644
--- a/TODO
+++ b/TODO
@@ -17,12 +17,10 @@ Get the ASM working on 64bit.
 
 Clean up the config system since it's a mystery to me.
 
-Multi-threading on decompression.
-
 Make stdout work without a temporary file.
 
 Make stdin on decompression work without a temporary file.
 
 Make testing file integrity work without a temporary file.
 
-Stop breaking Darwin builds :P
+Fix darwin build since it doesn't support unnamed semamphores.
diff --git a/configure b/configure
index 0b4bd9d..8e845a6 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.67 for lrzip 0.540.
+# Generated by GNU Autoconf 2.67 for lrzip 0.541.
 #
 # Report bugs to <kernel@kolivas.org>.
 #
@@ -551,9 +551,9 @@ MAKEFLAGS=
 
 # Identity of this package.
 PACKAGE_NAME='lrzip'
-PACKAGE_TARNAME='lrzip-0.540'
-PACKAGE_VERSION='0.540'
-PACKAGE_STRING='lrzip 0.540'
+PACKAGE_TARNAME='lrzip-0.541'
+PACKAGE_VERSION='0.541'
+PACKAGE_STRING='lrzip 0.541'
 PACKAGE_BUGREPORT='kernel@kolivas.org'
 PACKAGE_URL=''
 
@@ -1221,7 +1221,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures lrzip 0.540 to adapt to many kinds of systems.
+\`configure' configures lrzip 0.541 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1269,7 +1269,7 @@ Fine tuning of the installation directories:
   --infodir=DIR           info documentation [DATAROOTDIR/info]
   --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
   --mandir=DIR            man documentation [DATAROOTDIR/man]
-  --docdir=DIR            documentation root [DATAROOTDIR/doc/lrzip-0.540]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/lrzip-0.541]
   --htmldir=DIR           html documentation [DOCDIR]
   --dvidir=DIR            dvi documentation [DOCDIR]
   --pdfdir=DIR            pdf documentation [DOCDIR]
@@ -1286,7 +1286,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of lrzip 0.540:";;
+     short | recursive ) echo "Configuration of lrzip 0.541:";;
    esac
   cat <<\_ACEOF
 
@@ -1375,7 +1375,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-lrzip configure 0.540
+lrzip configure 0.541
 generated by GNU Autoconf 2.67
 
 Copyright (C) 2010 Free Software Foundation, Inc.
@@ -2014,7 +2014,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by lrzip $as_me 0.540, which was
+It was created by lrzip $as_me 0.541, which was
 generated by GNU Autoconf 2.67.  Invocation command line was
 
   $ $0 $@
@@ -5324,7 +5324,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by lrzip $as_me 0.540, which was
+This file was extended by lrzip $as_me 0.541, which was
 generated by GNU Autoconf 2.67.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -5386,7 +5386,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-lrzip config.status 0.540
+lrzip config.status 0.541
 configured by $0, generated by GNU Autoconf 2.67,
   with options \\"\$ac_cs_config\\"
 
diff --git a/configure.ac b/configure.ac
index 3796cb8..41f25ef 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
-AC_INIT([lrzip],[0.540],[kernel@kolivas.org],[lrzip-0.540])
+AC_INIT([lrzip],[0.541],[kernel@kolivas.org],[lrzip-0.541])
 AC_CONFIG_HEADER(config.h)
 # see what our system is!
 AC_CANONICAL_HOST
diff --git a/rzip.c b/rzip.c
index 44edca8..3db2742 100644
--- a/rzip.c
+++ b/rzip.c
@@ -101,6 +101,11 @@ struct sliding_buffer {
 	int fd;		/* The fd of the mmap */
 } sb;	/* Sliding buffer */
 
+static void round_to_page(i64 *size)
+{
+	*size -= *size % control.page_size;
+}
+
 static void remap_low_sb(void)
 {
 	static int top = 0;
@@ -113,8 +118,8 @@ static void remap_low_sb(void)
 		new_offset = sb.orig_size - sb.size_low;
 		top = 1;
 	}
-	new_offset -= new_offset % control.page_size; /* Round to page size */
-	print_maxverbose("Sliding main buffer\n");
+	round_to_page(&new_offset);
+	print_maxverbose("Sliding main buffer   \n");
 	if (unlikely(munmap(sb.buf_low, sb.size_low)))
 		fatal("Failed to munmap in remap_low_sb\n");
 	sb.offset_low = new_offset;
@@ -592,11 +597,6 @@ static void hash_search(struct rzip_state *st, double pct_base, double pct_multi
 		}
 	}
 
-	/* Fake that we got to 100% since we're done :D */
-	if (!STDIN)
-		print_progress("Total: 100%%  ");
-	print_progress("Chunk: 100%%\n");
-
 	if (MAX_VERBOSE)
 		show_distrib(st);
 
@@ -772,7 +772,7 @@ void rzip_fd(int fd_in, int fd_out)
 
 	init_hash_indexes(st);
 
-	passes = 1 + s.st_size / chunk_window;
+	passes = 0;
 
 	/* set timers and chunk counter */
 	last.tv_sec = last.tv_usec = 0;
@@ -871,6 +871,8 @@ retry:
 		gettimeofday(&current, NULL);
 		/* this will count only when size > window */
 		if (last.tv_sec > 0) {
+			if (!passes)
+				passes = s.st_size / st->chunk_size + 1;
 			elapsed_time = current.tv_sec - start.tv_sec;
 			finish_time = elapsed_time / (pct_base / 100.0);
 			elapsed_hours = (unsigned int)(elapsed_time) / 3600;
@@ -885,13 +887,13 @@ retry:
 					pass, passes, elapsed_hours, elapsed_minutes, elapsed_seconds,
 					eta_hours, eta_minutes, eta_seconds, chunkmbs);
 			else
-				print_verbose("\nPass %d / %d -- Elapsed Time: %02d:%02d:%02d. Compress Speed: %3.3fMB/s.\n",
-					pass, passes, elapsed_hours, elapsed_minutes, elapsed_seconds, chunkmbs);
+				print_verbose("\nPass %d -- Elapsed Time: %02d:%02d:%02d. Compress Speed: %3.3fMB/s.\n",
+					pass, elapsed_hours, elapsed_minutes, elapsed_seconds, chunkmbs);
 		}
 		last.tv_sec = current.tv_sec;
 		last.tv_usec = current.tv_usec;
 		rzip_chunk(st, fd_in, fd_out, offset, pct_base, pct_multiple);
-		/* st->chunk_bytes may be shrunk in rzip_chunk */
+		/* st->chunk_size may be shrunk in rzip_chunk */
 		last_chunk = st->chunk_size;
 		len -= st->chunk_size;
 	}
diff --git a/rzip.h b/rzip.h
index b1b59f4..d648109 100644
--- a/rzip.h
+++ b/rzip.h
@@ -19,7 +19,7 @@
 
 #define LRZIP_MAJOR_VERSION 0
 #define LRZIP_MINOR_VERSION 5
-#define LRZIP_MINOR_SUBVERSION 40
+#define LRZIP_MINOR_SUBVERSION 41
 
 #define NUM_STREAMS 2
 
@@ -121,6 +121,7 @@ extern int errno;
 #define likely(x)	__builtin_expect(!!(x), 1)
 #define unlikely(x)	__builtin_expect(!!(x), 0)
 
+typedef unsigned long long u64;
 typedef long long int i64;
 typedef uint16_t u16;
 typedef uint32_t u32;
diff --git a/stream.c b/stream.c
index d981cb4..309e6ef 100644
--- a/stream.c
+++ b/stream.c
@@ -649,9 +649,9 @@ static pthread_t *threads;
    compression level and algorithm */
 void *open_stream_out(int f, int n, i64 limit)
 {
-	unsigned cwindow = control.window;
 	struct stream_info *sinfo;
 	uchar *testmalloc;
+	unsigned cwindow;
 	int i;
 
 	sinfo = malloc(sizeof(*sinfo));
@@ -690,11 +690,14 @@ void *open_stream_out(int f, int n, i64 limit)
 	sinfo->fd = f;
 
 	if (BITS32) {
-		/* Largest window supported on 32bit is 600MB */
-		if (!cwindow || cwindow > 6)
-			cwindow = 6;
-		control.window = cwindow;
+		/* Largest window we can safely support on 32bit is 2GB */
+		if (!control.window || control.window > 20)
+			control.window = 20;
+		/* Largest window supported by lzma is 300MB */
+		if (LZMA_COMPRESS && control.window > 3)
+			control.window = 3;
 	}
+	cwindow = control.window;
 
 	/* No point making the stream larger than the amount of data */
 	if (cwindow)