diff --git a/src/xenia/base/memory.h b/src/xenia/base/memory.h
index 14fb65968..01cf40f87 100644
--- a/src/xenia/base/memory.h
+++ b/src/xenia/base/memory.h
@@ -466,6 +466,48 @@ constexpr inline fourcc_t make_fourcc(const std::string_view fourcc) {
   }
   return make_fourcc(fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
 }
+//chrispy::todo:use for command stream vector, resize happens a ton and has to call memset
+template <size_t sz>
+class fixed_vmem_vector {
+  static_assert((sz & 65535) == 0,
+                "Always give fixed_vmem_vector a size divisible by 65536 to "
+                "avoid wasting memory on windows");
+
+  uint8_t* data_;
+  size_t nbytes_;
+
+ public:
+  fixed_vmem_vector()
+      : data_((uint8_t*)AllocFixed(nullptr, sz, AllocationType::kReserveCommit,
+                                   PageAccess::kReadWrite)),
+        nbytes_(0) {}
+  ~fixed_vmem_vector() {
+    if (data_) {
+      DeallocFixed(data_, sz, DeallocationType::kRelease);
+      data_ = nullptr;
+    }
+    nbytes_ = 0;
+  }
+
+  uint8_t* data() const { return data_; }
+  size_t size() const { return nbytes_; }
+
+  void resize(size_t newsize) {
+    nbytes_ = newsize;
+    xenia_assert(newsize < sz);
+  }
+  size_t alloc() const { return sz; }
+
+  void clear() {
+    resize(0);  // todo:maybe zero out
+  }
+  void reserve(size_t size) { xenia_assert(size < sz); }
+
+
+};
+
+
+
 
 }  // namespace xe
 
diff --git a/src/xenia/base/mutex.h b/src/xenia/base/mutex.h
index e93f71e1b..36351377b 100644
--- a/src/xenia/base/mutex.h
+++ b/src/xenia/base/mutex.h
@@ -12,7 +12,7 @@
 #include <mutex>
 #include "platform.h"
 
-//#define		XE_ENABLE_FAST_WIN32_MUTEX 1
+#define		XE_ENABLE_FAST_WIN32_MUTEX 1
 namespace xe {
 
 #if XE_PLATFORM_WIN32 == 1 && XE_ENABLE_FAST_WIN32_MUTEX==1
diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc
index dfc993dee..4205016cd 100644
--- a/src/xenia/gpu/command_processor.cc
+++ b/src/xenia/gpu/command_processor.cc
@@ -493,13 +493,18 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
   // very unlikely. these ORS here are meant to be bitwise ors, so that we do
   // not do branching evaluation of the conditions (we will almost always take
   // all of the branches)
-  if (XE_UNLIKELY(
-          (index - XE_GPU_REG_SCRATCH_REG0 < 8) |
-          (index == XE_GPU_REG_COHER_STATUS_HOST) |
-          ((index - XE_GPU_REG_DC_LUT_RW_INDEX) <=
-           (XE_GPU_REG_DC_LUT_30_COLOR - XE_GPU_REG_DC_LUT_RW_INDEX)))) {
+
+  unsigned expr = (index - XE_GPU_REG_SCRATCH_REG0 < 8) |
+                  (index == XE_GPU_REG_COHER_STATUS_HOST) |
+                  ((index - XE_GPU_REG_DC_LUT_RW_INDEX) <=
+                   (XE_GPU_REG_DC_LUT_30_COLOR - XE_GPU_REG_DC_LUT_RW_INDEX));
+  //chrispy: reordered for msvc branch probability (assumes if is taken and else is not)
+  if (XE_LIKELY(expr == 0)) {
+  
+  } else {
     HandleSpecialRegisterWrite(index, value);
   }
+
 }
 
 void CommandProcessor::MakeCoherent() {
diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h
index 412e8833d..c9245773b 100644
--- a/src/xenia/gpu/command_processor.h
+++ b/src/xenia/gpu/command_processor.h
@@ -153,6 +153,7 @@ class CommandProcessor {
   // rarely needed, most register writes have no special logic here
   XE_NOINLINE
   void HandleSpecialRegisterWrite(uint32_t index, uint32_t value);
+  XE_FORCEINLINE
   virtual void WriteRegister(uint32_t index, uint32_t value);
 
   const reg::DC_LUT_30_COLOR* gamma_ramp_256_entry_table() const {
diff --git a/src/xenia/gpu/d3d12/deferred_command_list.h b/src/xenia/gpu/d3d12/deferred_command_list.h
index 22b4fc5da..a1b063558 100644
--- a/src/xenia/gpu/d3d12/deferred_command_list.h
+++ b/src/xenia/gpu/d3d12/deferred_command_list.h
@@ -30,8 +30,11 @@ class D3D12CommandProcessor;
 
 class DeferredCommandList {
  public:
+  /*
+	chrispy: upped from 1_MiB to 4_MiB, m:durandal hits frequent resizes in large open maps
+  */
   DeferredCommandList(const D3D12CommandProcessor& command_processor,
-                      size_t initial_size_bytes = 1_MiB);
+                      size_t initial_size_bytes = 4_MiB);
 
   void Reset();
   void Execute(ID3D12GraphicsCommandList* command_list,