diff --git a/Cargo.lock b/Cargo.lock
index 671f204..98e459b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -150,6 +150,12 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
+[[package]]
+name = "cfgenius"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ae118db0db0e2137671e9b916de8af5d3be65a5fb1ad3a9c7925c38b6644cfb"
+
 [[package]]
 name = "clap"
 version = "4.5.28"
@@ -829,6 +835,12 @@ dependencies = [
  "windows-targets",
 ]
 
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
 [[package]]
 name = "percent-encoding"
 version = "2.3.1"
@@ -1223,6 +1235,7 @@ dependencies = [
  "atomic",
  "bytemuck",
  "cfg-if",
+ "cfgenius",
  "clap",
  "easy-bitfield",
  "env_logger",
@@ -1231,6 +1244,7 @@ dependencies = [
  "log",
  "mmtk",
  "parking_lot",
+ "paste",
  "rand",
  "sysinfo 0.33.1",
  "vmkit-proc",
diff --git a/vmkit-proc/src/bytecode.rs b/vmkit-proc/src/bytecode.rs
new file mode 100644
index 0000000..49e6999
--- /dev/null
+++ b/vmkit-proc/src/bytecode.rs
@@ -0,0 +1,6 @@
+//! Bytecode generation DSL
+//! 
+//! This module provides a DSL for generating bytecode instructions.
+
+pub mod decl;
+pub mod fits;
\ No newline at end of file
diff --git a/vmkit-proc/src/bytecode/decl.rs b/vmkit-proc/src/bytecode/decl.rs
new file mode 100644
index 0000000..4090f4a
--- /dev/null
+++ b/vmkit-proc/src/bytecode/decl.rs
@@ -0,0 +1,62 @@
+use proc_macro2::TokenStream;
+use quote::quote;
+
+use super::fits;
+
+
+
+/// A single opcode argument
+pub struct Argument {
+    pub name: syn::Ident,
+    pub index: usize,
+    pub optional: bool,
+    pub ty: syn::Type
+}
+
+impl Argument {
+    pub fn new(name: syn::Ident, index: usize, optional: bool, ty: syn::Type) -> Self {
+        Self {
+            name,
+            index,
+            optional,
+            ty
+        }
+    }
+
+    pub fn field(&self) -> TokenStream {
+        let name = &self.name;
+        let ty = &self.ty;
+        quote! {
+            #name: #ty
+        }
+    }
+
+    pub fn create_param(&self) -> TokenStream {
+        let name = &self.name;
+        let ty = &self.ty;
+        if self.optional {
+            quote! {
+                #name: Option<#ty>
+            }
+        } else {
+            quote! {
+                #name: #ty
+            }
+        }
+    }
+
+    pub fn field_name(&self) -> TokenStream {
+        let name = &self.name;
+        quote! {
+            #name
+        }
+    }
+
+    pub fn fits_check(&self, size: usize) -> TokenStream {
+        fits::check(size, self.name.clone(), self.ty.clone())
+    }
+
+    pub fn fits_write(&self, size: usize) -> TokenStream {
+        fits::write(size, self.name.clone(), self.ty.clone())
+    }
+}
\ No newline at end of file
diff --git a/vmkit-proc/src/bytecode/fits.rs b/vmkit-proc/src/bytecode/fits.rs
new file mode 100644
index 0000000..6020fcc
--- /dev/null
+++ b/vmkit-proc/src/bytecode/fits.rs
@@ -0,0 +1,25 @@
+use proc_macro2::{Span, TokenStream};
+use quote::quote;
+use syn::Ident;
+
+pub fn convert(size: usize, name: syn::Ident, typ: syn::Type) -> TokenStream {
+    let sz = Ident::new(&format!("S{size}"), Span::call_site());
+    quote! {
+        Fits::<#typ, #sz>::convert(#name)
+    }
+}
+
+pub fn check(size: usize, name: syn::Ident, typ: syn::Type) -> TokenStream {
+    let sz = Ident::new(&format!("S{size}"), Span::call_site());
+    quote! {
+        Fits::<#typ, #sz>::check(#name)
+    }
+}
+
+pub fn write(size: usize, name: syn::Ident, typ: syn::Type) -> TokenStream {
+    let sz = Ident::new(&format!("S{size}"), Span::call_site());
+    let cvt = convert(size, name, typ);
+    quote! {
+        generator.write(#cvt)
+    }
+}
\ No newline at end of file
diff --git a/vmkit-proc/src/lib.rs b/vmkit-proc/src/lib.rs
index d96bb66..e031634 100644
--- a/vmkit-proc/src/lib.rs
+++ b/vmkit-proc/src/lib.rs
@@ -2,12 +2,9 @@
 
 extern crate proc_macro;
 
-use std::collections::HashSet;
-
-use proc_macro::TokenStream;
-use proc_macro2::{extra, Span};
+use proc_macro2::Span;
 use quote::quote;
-use syn::{parse_macro_input, Arm, Data, DeriveInput};
+use std::collections::HashSet;
 use synstructure::{decl_derive, AddBounds};
 
 struct ArrayLike {
@@ -15,6 +12,7 @@ struct ArrayLike {
     /// Getter for length field.
     pub length_getter: Option<syn::Expr>,
     /// Setter for length field.
+    #[allow(dead_code)]
     pub length_setter: Option<syn::Expr>,
     pub element_type: syn::Type,
     pub data_field: Option<syn::Ident>,
@@ -155,6 +153,7 @@ fn find_arraylike(attrs: &[syn::Attribute]) -> syn::Result<Option<ArrayLike>> {
 enum ObjectAlignment {
     Auto,
     Const(syn::Expr),
+    #[allow(dead_code)]
     Compute(syn::Expr),
 }
 
@@ -331,7 +330,7 @@ pub fn derive_gcmetadata(input: TokenStream) -> TokenStream {
 
 decl_derive!([GCMetadata,attributes(gcmetadata, arraylike, ignore_trace)] => derive_gcmetadata);
 
-fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenStream {
+fn derive_gcmetadata(s: synstructure::Structure<'_>) -> proc_macro2::TokenStream {
     let gc_metadata = match find_gcmetadata(&s.ast().attrs) {
         Ok(gc) => gc,
         Err(err) => return err.to_compile_error(),
@@ -372,7 +371,6 @@ fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenSt
 
             let trace_impl = if scan_slots {
                 let trace_body = filtered.each(|bi| {
-                    
                     quote! {
                         mark(#bi, visitor);
                     }
@@ -484,40 +482,48 @@ fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenSt
                 )
             };
 
-            (Some(trace_impl), if scan_slots {
-                let full_path = quote! { <#vm as vmkit::VirtualMachine>::Slot };
-                quote! { ::vmkit::prelude::TraceCallback::ScanSlots(|object, visitor| unsafe {
-                    let object = object.as_address().as_ref::<#name>();
-                    ::vmkit::prelude::Scan::<#full_path>::scan_object(object, visitor);
-                }) }
-            } else {
-                quote! { ::vmkit::prelude::TraceCallback::TraceObject(|object, visitor| unsafe {
-                    let object = object.as_address().as_mut_ref::<#name>();
-                    ::vmkit::prelude::Trace::trace_object(object, visitor);
-                }) }
-            })
+            (
+                Some(trace_impl),
+                if scan_slots {
+                    let full_path = quote! { <#vm as vmkit::VirtualMachine>::Slot };
+                    quote! { ::vmkit::prelude::TraceCallback::ScanSlots(|object, visitor| unsafe {
+                        let object = object.as_address().as_ref::<#name>();
+                        ::vmkit::prelude::Scan::<#full_path>::scan_object(object, visitor);
+                    }) }
+                } else {
+                    quote! { ::vmkit::prelude::TraceCallback::TraceObject(|object, visitor| unsafe {
+                        let object = object.as_address().as_mut_ref::<#name>();
+                        ::vmkit::prelude::Trace::trace_object(object, visitor);
+                    }) }
+                },
+            )
         }
 
         ObjectTrace::Trace(scan_slots, expr) => {
             if scan_slots {
-                (None, quote! {
-                    ::vmkit::prelude::TraceCallback::ScanSlots(|object, visitor| unsafe {
-                        let this = object.as_address().as_ref::<#name>();
-                        #expr
-                    })
-                })
+                (
+                    None,
+                    quote! {
+                        ::vmkit::prelude::TraceCallback::ScanSlots(|object, visitor| unsafe {
+                            let this = object.as_address().as_ref::<#name>();
+                            #expr
+                        })
+                    },
+                )
             } else {
-                (None, quote! {
-                    ::vmkit::prelude::TraceCallback::TraceObject(|object, visitor| unsafe {
-                        let this = object.as_address().as_mut_ref::<#name>();
-                        #expr
-                    })
-                })
+                (
+                    None,
+                    quote! {
+                        ::vmkit::prelude::TraceCallback::TraceObject(|object, visitor| unsafe {
+                            let this = object.as_address().as_mut_ref::<#name>();
+                            #expr
+                        })
+                    },
+                )
             }
         }
-        ,
     };
-    
+
     let instance_size = match &gc_metadata.size {
         ObjectSize::Auto if arraylike.is_some() => quote! { 0 },
         ObjectSize::Size(_) if arraylike.is_some() => quote! { 0 },
@@ -547,12 +553,11 @@ fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenSt
                     panic!("length field not found");
                 };
 
-               
                 let element = &arraylike.element_type;
                 quote! { Some(|object| unsafe {
                     let this = object.as_address().as_ref::<#name>();
                     let length = #length;
-                    
+
 
                     size_of::<#name>() + (length * ::std::mem::size_of::<#element>())
                 }) }
@@ -589,3 +594,5 @@ fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenSt
 
     output.into()
 }
+
+mod bytecode;
\ No newline at end of file
diff --git a/vmkit/Cargo.toml b/vmkit/Cargo.toml
index ee201d8..baf917a 100644
--- a/vmkit/Cargo.toml
+++ b/vmkit/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 atomic = "0.6.0"
 bytemuck = "1.21.0"
 cfg-if = "1.0.0"
+cfgenius = "0.1.1"
 clap = { version = "4.5.28", features = ["derive"] }
 easy-bitfield = "0.1.0"
 env_logger = "0.11.6"
@@ -15,6 +16,7 @@ libc = "0.2.169"
 log = { version = "0.4.25" }
 mmtk = { git = "https://github.com/mmtk/mmtk-core" }
 parking_lot = "0.12.3"
+paste = "1.0.15"
 rand = "0.9.0"
 sysinfo = "0.33.1"
 vmkit-proc = { path = "../vmkit-proc", optional = true }
diff --git a/vmkit/examples/binarytrees-mt.c b/vmkit/examples/binarytrees-mt.c
new file mode 100644
index 0000000..72fe58b
--- /dev/null
+++ b/vmkit/examples/binarytrees-mt.c
@@ -0,0 +1,104 @@
+#define GC_THREADS 1
+#define GC_NO_THREAD_REDIRECTS 1
+#include <gc.h>
+
+#include <stdio.h>
+#include <time.h>
+
+
+typedef struct Node {
+    struct Node *left;
+    struct Node *right;
+} Node;
+
+Node* leaf() {
+    return GC_malloc(sizeof(Node));
+}
+
+Node* new_node(Node* left, Node* right) {
+    Node* node = GC_malloc(sizeof(Node));
+    node->left = left;
+    node->right = right;
+    return node;
+}
+
+int itemCheck(Node* node) {
+    if (node->left == NULL) {
+        return 1;
+    }
+    return 1 + itemCheck(node->left) + itemCheck(node->right);
+}
+
+Node* bottomUpTree(int depth) {
+    if (depth > 0) {
+        return new_node(bottomUpTree(depth - 1), bottomUpTree(depth - 1));
+    }
+    return leaf();
+}
+
+extern void* __data_start;
+extern void* _end;
+
+Node* longLivedTree;
+char results[16][256];
+void* threadWork(void* data) {
+    struct GC_stack_base base;
+    GC_get_stack_base(&base);
+    GC_register_my_thread(&base);
+    int d = (int)data;
+    int iterations = 1 << (21 - d + 4);
+    int check = 0;
+    for (int i = 0; i < iterations; i++) {
+        Node* treeNode = bottomUpTree(d);
+        check += itemCheck(treeNode);
+    }
+    //results[(d-4)/2] = 
+    
+    sprintf(&results[(d-4)/2][0],"%d\t trees of depth %d\t check: %d\n", iterations, d, check);
+    GC_unregister_my_thread();
+    return NULL;
+}
+
+int main() {
+    printf("DATA START: %p\n", &__data_start);
+    printf("DATA END: %p\n", &_end);
+    GC_use_entire_heap = 1;
+    GC_allow_register_threads();
+    GC_init();
+    
+
+    int maxDepth = 21;
+    int stretchDepth = maxDepth + 1;
+    int start = clock();
+    Node* stretchTree = bottomUpTree(stretchDepth);
+    printf("stretch tree of depth %d\n", stretchDepth);
+    printf("time: %f\n", ((double)clock() - start) / CLOCKS_PER_SEC);
+
+    longLivedTree = bottomUpTree(maxDepth);
+    GC_gcollect();
+
+    pthread_t threads[16];
+
+    printf("long lived tree of depth %d\t check: %d\n", maxDepth, itemCheck(longLivedTree));
+    for (int d = 4; d <= maxDepth; d += 2) {
+        /*int iterations = 1 << (maxDepth - d + 4);
+        int check = 0;
+        for (int i = 0; i < iterations; i++) {
+            Node* treeNode = bottomUpTree(d);
+            check += itemCheck(treeNode);
+        }
+        printf("%d\t trees of depth %d\t check: %d\n", iterations, d, check);*/
+        void* data = (void*)d;
+        pthread_create(&threads[(d-4)/2], NULL, threadWork, data);
+    }
+
+    for (int d = 4; d <= maxDepth; d += 2) {
+        pthread_join(threads[(d-4)/2], NULL);
+        printf(results[(d-4)/2]);
+    }
+
+    printf("long lived tree of depth %d\t check: %d\n", maxDepth, itemCheck(longLivedTree));
+    printf("time: %f\n", ((double)clock() - start) / CLOCKS_PER_SEC);
+
+    return 0;
+}
diff --git a/vmkit/examples/binarytrees.c b/vmkit/examples/binarytrees.c
index bfb30a5..914b348 100644
--- a/vmkit/examples/binarytrees.c
+++ b/vmkit/examples/binarytrees.c
@@ -46,7 +46,7 @@ int main() {
     GC_init();
     
 
-    int maxDepth = 18;
+    int maxDepth = 21;
     int stretchDepth = maxDepth + 1;
     int start = clock();
     Node* stretchTree = bottomUpTree(stretchDepth);
diff --git a/vmkit/src/bdwgc_shim.rs b/vmkit/src/bdwgc_shim.rs
index 7bd4c97..4e30547 100644
--- a/vmkit/src/bdwgc_shim.rs
+++ b/vmkit/src/bdwgc_shim.rs
@@ -253,6 +253,10 @@ impl VirtualMachine for BDWGC {
             }
         }
     }
+
+    fn compute_hashcode(object: VMKitObject) -> usize {
+        object.as_address().as_usize()
+    }
 }
 
 type VTableAddress = BitField<usize, usize, 0, 37, false>;
@@ -605,6 +609,11 @@ pub extern "C-unwind" fn GC_unregister_my_thread() {
     unsafe { Thread::<BDWGC>::unregister_mutator_manual() };
 }
 
+#[no_mangle]
+pub extern "C-unwind" fn GC_allow_register_threads() {
+    /* noop: always allowed */
+}
+
 #[no_mangle]
 pub extern "C-unwind" fn GC_pthread_create(
     thread_ptr: &mut libc::pthread_t,
diff --git a/vmkit/src/lib.rs b/vmkit/src/lib.rs
index 07a9756..dbca05c 100644
--- a/vmkit/src/lib.rs
+++ b/vmkit/src/lib.rs
@@ -13,10 +13,9 @@ pub mod mm;
 pub mod object_model;
 pub mod options;
 pub mod platform;
-pub mod semaphore;
 pub mod sync;
 pub mod threading;
-
+pub mod macros;
 #[cfg(feature = "uncooperative")]
 pub mod bdwgc_shim;
 
@@ -219,8 +218,8 @@ impl<VM: VirtualMachine> VMKit<VM> {
 
 #[cfg(feature="derive")]
 pub use vmkit_proc::GCMetadata;
-
 pub mod prelude {
+    #[cfg(feature="derive")]
     pub use super::GCMetadata;
     pub use super::mm::traits::*;
     pub use super::object_model::object::*;
diff --git a/vmkit/src/machine_context.rs b/vmkit/src/machine_context.rs
index 139597f..e15d53b 100644
--- a/vmkit/src/machine_context.rs
+++ b/vmkit/src/machine_context.rs
@@ -1,2 +1,62 @@
+cfgenius::cond! {
+
+    if macro(crate::macros::darwin) {
+        cfgenius::cond! {
+            if cfg(target_os="x86_64") {
+                pub type PlatformRegisters = libc::__darwin_x86_thread_state64;
+            } else if cfg!(target_os="arm64") {
+                pub type PlatformRegisters = libc::__darwin_arm_thread_state64;
+            } else {
+                compile_error!("Unsupported Apple target");
+            }
+        }
+
+        pub unsafe fn registers_from_ucontext(ucontext: *const libc::ucontext_t) -> *const PlatformRegisters {
+            return &(*ucontext).uc_mcontext.__ss;
+        }
 
 
+    } else if macro(crate::macros::have_machine_context) {
+
+        #[cfg(not(target_os="openbsd"))]
+        use libc::mcontext_t;
+        #[cfg(target_os="openbsd")]
+        use libc::ucontext_t as mcontext_t;
+
+        #[repr(C)]
+        #[derive(Clone)]
+        pub struct PlatformRegisters {
+            pub machine_context: mcontext_t
+        }
+
+        pub unsafe fn registers_from_ucontext(ucontext: *const libc::ucontext_t) -> *const PlatformRegisters {
+            cfgenius::cond! {
+                if cfg(target_os="openbsd")
+                {
+                    return ucontext.cast();
+                }
+                else if cfg(target_arch="powerpc")
+                {
+                    return unsafe { std::mem::transmute(
+                        &(*ucontext).uc_mcontext.uc_regs
+                    ) }
+                } else {
+                    return unsafe { std::mem::transmute(
+                        &(*ucontext).uc_mcontext
+                    ) }
+                }
+
+            }
+        }
+
+
+    } else if cfg(windows) {
+        use winapi::um::winnt::CONTEXT;
+
+        pub type PlatformRegisters = CONTEXT;
+    } else {
+        pub struct PlatformRegisters {
+            pub stack_pointer: *mut u8
+        }
+    }
+}
diff --git a/vmkit/src/macros.rs b/vmkit/src/macros.rs
new file mode 100644
index 0000000..634491e
--- /dev/null
+++ b/vmkit/src/macros.rs
@@ -0,0 +1,27 @@
+cfgenius::define! {
+    pub darwin = cfg(target_vendor="apple");
+    pub ios_family = all(macro(darwin), cfg(target_os="ios"));
+    pub have_machine_context = any(
+        macro(darwin),
+        cfg(target_os="fuchsia"),
+        all(
+            cfg(any(
+                target_os="freebsd",
+                target_os="haiku",
+                target_os="netbsd",
+                target_os="openbsd",
+                target_os="linux",
+                target_os="hurd"
+            )),
+            cfg(
+                any(
+                    target_arch="x86_64",
+                    target_arch="arm",
+                    target_arch="aarch64",
+                    target_arch="riscv64", 
+                )
+            )
+        ));
+
+}
+
diff --git a/vmkit/src/main.rs b/vmkit/src/main.rs
index b86d3d8..ef6b1d0 100644
--- a/vmkit/src/main.rs
+++ b/vmkit/src/main.rs
@@ -1,7 +1,6 @@
 use mmtk::util::Address;
 use mmtk::vm::slot::UnimplementedMemorySlice;
 use mmtk::{util::options::PlanSelector, vm::slot::SimpleSlot, AllocationSemantics, MMTKBuilder};
-use vmkit::GCMetadata;
 use std::cell::RefCell;
 use std::mem::offset_of;
 use std::sync::Arc;
@@ -182,16 +181,7 @@ fn bottom_up_tree(thread: &Thread<BenchVM>, depth: usize) -> NodeRef {
 
 const MIN_DEPTH: usize = 4;
 
-#[derive(GCMetadata)]
-#[gcmetadata(
-    vm = BenchVM,
-    scan
-)]
-enum Object {
-    
-    Fixnum(#[ignore_trace] i32),
-    Boxed(VMKitObject)
-}
+
 
 
 
@@ -210,7 +200,7 @@ fn main() {
     .unwrap_or_else(|_| panic!());
 
     Thread::<BenchVM>::main(ThreadBenchContext, || {
-        let thread = Thread::<BenchVM>::current();
+        /*let thread = Thread::<BenchVM>::current();
         let start = std::time::Instant::now();
         let n = std::env::var("DEPTH")
             .unwrap_or("18".to_string())
@@ -271,6 +261,17 @@ fn main() {
         );
 
         let duration = start.elapsed();
-        println!("time: {duration:?}");
+        println!("time: {duration:?}");*/
+
+
+        let thread = Thread::<BenchVM>::current();
+
+        thread.save_registers();
+
+        let registers = thread.get_registers();
+
+        for (i, greg) in registers.machine_context.gregs.iter().enumerate() {
+            println!("{:02}: {:x}",i, greg);
+        }
     });
 }
diff --git a/vmkit/src/mm.rs b/vmkit/src/mm.rs
index 67c8faf..177fbf8 100644
--- a/vmkit/src/mm.rs
+++ b/vmkit/src/mm.rs
@@ -146,6 +146,7 @@ impl<VM: VirtualMachine> MemoryManager<VM> {
                         object_start.store(HeapObjectHeader::<VM>::new(metadata));
                         let object = VMKitObject::from_address(object_start + OBJECT_REF_OFFSET);
                         Self::set_vo_bit(object);
+                        debug_assert!(mmtk::memory_manager::is_mapped_address(object.as_address()));
                         return object;
                     }
 
@@ -272,6 +273,7 @@ impl<VM: VirtualMachine> MemoryManager<VM> {
             let object = VMKitObject::from_address(object_start + OBJECT_REF_OFFSET);
             Self::set_vo_bit(object);
             Self::refill_tlab(thread);
+            debug_assert!(mmtk::memory_manager::is_mapped_address(object.as_address()));
             object
         }
     }
diff --git a/vmkit/src/mm/scanning.rs b/vmkit/src/mm/scanning.rs
index c5739ef..3feb4e3 100644
--- a/vmkit/src/mm/scanning.rs
+++ b/vmkit/src/mm/scanning.rs
@@ -1,17 +1,13 @@
 use std::marker::PhantomData;
 
 use crate::{
-    mm::MemoryManager,
-    object_model::{
+    machine_context::PlatformRegisters, mm::MemoryManager, object_model::{
         metadata::{Metadata, TraceCallback},
         object::VMKitObject,
-    },
-    threading::{Thread, ThreadContext},
-    VirtualMachine,
+    }, threading::{Thread, ThreadContext}, VirtualMachine
 };
 use mmtk::{
-    vm::{slot::Slot, ObjectTracer, Scanning, SlotVisitor},
-    MutatorContext,
+    util::Address, vm::{slot::Slot, ObjectTracer, Scanning, SlotVisitor}, MutatorContext
 };
 
 use super::traits::ToSlot;
@@ -106,9 +102,17 @@ impl<VM: VirtualMachine> Scanning<MemoryManager<VM>> for VMKitScanning<VM> {
             use super::conservative_roots::ConservativeRoots;
             let mut croots = ConservativeRoots::new(128);
             let bounds = *tls.stack_bounds();
+            let registers = tls.get_registers();
+
+            unsafe {
+                let start = Address::from_ref(&registers);
+                let end = start.add(size_of::<PlatformRegisters>());
+                croots.add_span(start, end);
+            }
             unsafe { croots.add_span(bounds.origin(), tls.stack_pointer()) };
             tls.context.scan_conservative_roots(&mut croots);
             croots.add_to_factory(&mut factory);
+            drop(registers);
         }
     }
 
diff --git a/vmkit/src/object_model/compression.rs b/vmkit/src/object_model/compression.rs
index 878230b..104455d 100644
--- a/vmkit/src/object_model/compression.rs
+++ b/vmkit/src/object_model/compression.rs
@@ -47,6 +47,10 @@ static COMPRESSED_OPS: CompressedOpsStorage =
         range: (Address::ZERO, Address::ZERO),
     }));
 
+
+pub const MY_RANDOM_ADDR: usize = 0x500_000_000 + 6 * 1024 * 1024 * 1024;
+pub const COMPRESSED: usize = (MY_RANDOM_ADDR - 0x500_000_000) >> 3;
+
 impl CompressedOps {
     /// Initialize compressed object pointers.
     ///
diff --git a/vmkit/src/object_model/header.rs b/vmkit/src/object_model/header.rs
index 37cf2d1..d69038a 100644
--- a/vmkit/src/object_model/header.rs
+++ b/vmkit/src/object_model/header.rs
@@ -11,7 +11,7 @@ pub const OBJECT_REF_OFFSET: isize = 8;
 pub const OBJECT_HEADER_OFFSET: isize = -OBJECT_REF_OFFSET;
 pub const HASHCODE_OFFSET: isize = -(OBJECT_REF_OFFSET + size_of::<usize>() as isize);
 
-pub const METADATA_BIT_LIMIT: usize = if ADDRESS_BASED_HASHING { usize::BITS as usize - 2 } else { usize::BITS as usize };
+pub const METADATA_BIT_LIMIT: usize = if ADDRESS_BASED_HASHING { usize::BITS as usize - 2 } else { usize::BITS as usize - 1 };
 
 pub type MetadataField = BitField<usize, usize, 0, METADATA_BIT_LIMIT, false>;
 pub type HashStateField = BitField<usize, HashState, { MetadataField::NEXT_BIT }, 2, false>;
diff --git a/vmkit/src/sync.rs b/vmkit/src/sync.rs
index e10bf17..b1242bb 100644
--- a/vmkit/src/sync.rs
+++ b/vmkit/src/sync.rs
@@ -1,233 +1,17 @@
-use std::{
-    mem::ManuallyDrop,
-    num::NonZeroU64,
-    ops::Deref,
-    sync::atomic::{AtomicU64, AtomicUsize, Ordering},
-    time::Duration,
-};
+//! Synchronization primitives for VMKit.
+//! 
+//! 
+//! Provides synchronization primitives which are friendly to our thread system. Most of the types
+//! provide `*_with_handshake` and `*_no_handshake` methods. The main difference between these two
+//! methods is that the former will notify the scheduler that thread is blocked, while the latter
+//! will not. Most of the times your code should use the `*_with_handshake` methods, so that GC
+//! or other tasks can be scheduled while the thread is blocked. `*_no_handshake` methods should be
+//! used when the thread is accessing GC data structures, or when the thread is not allowed to be
+//! blocked.
+pub mod semaphore;
+pub mod monitor;
 
-use parking_lot::{Condvar, Mutex, MutexGuard, WaitTimeoutResult};
 
-use crate::{
-    threading::{parked_scope, Thread, ThreadContext},
-    VirtualMachine,
-};
-
-fn get_thread_id() -> NonZeroU64 {
-    thread_local! {
-        static KEY: u64 = 0;
-    }
-    KEY.with(|x| {
-        NonZeroU64::new(x as *const _ as u64).expect("thread-local variable address is null")
-    })
-}
-
-/// Implementation of a heavy lock and condition variable implemented using
-/// the primitives available from the `parking_lot`.  Currently we use
-/// a `Mutex` and `Condvar`.
-/// <p>
-/// It is perfectly safe to use this throughout the VM for locking.  It is
-/// meant to provide roughly the same functionality as ReentrantMutex combined with Condvar,
-/// except:
-/// <ul>
-/// <li>This struct provides a faster slow path than ReentrantMutex.</li>
-/// <li>This struct provides a slower fast path than ReentrantMutex.</li>
-/// <li>This struct will work in the inner guts of the VM runtime because
-///     it gives you the ability to lock and unlock, as well as wait and
-///     notify, without using any other VM runtime functionality.</li>
-/// <li>This struct allows you to optionally block without letting the thread
-///     system know that you are blocked.  The benefit is that you can
-///     perform synchronization without depending on VM thread subsystem functionality.
-///     However, most of the time, you should use the methods that inform
-///     the thread system that you are blocking.  Methods that have the
-///     `with_handshake` suffix will inform the thread system if you are blocked,
-///     while methods that do not have the suffix will either not block
-///     (as is the case with `unlock()` and `broadcast()`)
-///     or will block without letting anyone know (like `lock_no_handshake()`
-///     and `wait_no_handshake()`). Not letting the threading
-///     system know that you are blocked may cause things like GC to stall
-///     until you unblock.</li>
-/// <li>This struct does not provide mutable access to the protected data as it is unsound,
-///     instead use `RefCell` to mutate the protected data.</li>
-/// </ul>
-pub struct Monitor<T> {
-    mutex: Mutex<T>,
-    cvar: Condvar,
-    rec_count: AtomicUsize,
-    holder: AtomicU64,
-}
-
-impl<T> Monitor<T> {
-    pub const fn new(value: T) -> Self {
-        Self {
-            mutex: Mutex::new(value),
-            cvar: Condvar::new(),
-            rec_count: AtomicUsize::new(0),
-            holder: AtomicU64::new(0),
-        }
-    }
-
-    pub fn lock_no_handshake(&self) -> MonitorGuard<T> {
-        let my_slot = get_thread_id().get();
-        let guard = if self.holder.load(Ordering::Relaxed) != my_slot {
-            let guard = self.mutex.lock();
-            self.holder.store(my_slot, Ordering::Release);
-            MonitorGuard {
-                monitor: self,
-                guard: ManuallyDrop::new(guard),
-            }
-        } else {
-            MonitorGuard {
-                monitor: self,
-                guard: unsafe { ManuallyDrop::new(self.mutex.make_guard_unchecked()) },
-            }
-        };
-        self.rec_count.fetch_add(1, Ordering::Relaxed);
-        guard
-    }
-
-    pub fn lock_with_handshake<VM: VirtualMachine>(&self) -> MonitorGuard<T> {
-        let my_slot = get_thread_id().get();
-        let guard = if my_slot != self.holder.load(Ordering::Relaxed) {
-            let guard = self.lock_with_handshake_no_rec::<VM>();
-            self.holder.store(my_slot, Ordering::Release);
-            guard
-        } else {
-            MonitorGuard {
-                monitor: self,
-                guard: unsafe { ManuallyDrop::new(self.mutex.make_guard_unchecked()) },
-            }
-        };
-        self.rec_count.fetch_add(1, Ordering::Relaxed);
-        guard
-    }
-
-    fn lock_with_handshake_no_rec<VM: VirtualMachine>(&self) -> MonitorGuard<'_, T> {
-        let tls = Thread::<VM>::current();
-        tls.context.save_thread_state();
-
-        let mutex_guard = loop {
-            Thread::<VM>::enter_native();
-            let guard = self.mutex.lock();
-            if Thread::<VM>::attempt_leave_native_no_block() {
-                break guard;
-            } else {
-                drop(guard);
-                Thread::<VM>::leave_native();
-            }
-        };
-
-        MonitorGuard {
-            monitor: self,
-            guard: ManuallyDrop::new(mutex_guard),
-        }
-    }
-
-    pub fn notify(&self) {
-        self.cvar.notify_one();
-    }
-
-    pub fn notify_all(&self) {
-        self.cvar.notify_all();
-    }
-
-    pub unsafe fn relock_with_handshake<VM: VirtualMachine>(
-        &self,
-        rec_count: usize,
-    ) -> MonitorGuard<'_, T> {
-        let thread = Thread::<VM>::current();
-        thread.context.save_thread_state();
-        let guard = loop {
-            Thread::<VM>::enter_native();
-            let lock = self.mutex.lock();
-            if Thread::<VM>::attempt_leave_native_no_block() {
-                break lock;
-            } else {
-                drop(lock);
-                Thread::<VM>::leave_native();
-            }
-        };
-
-        self.holder.store(get_thread_id().get(), Ordering::Relaxed);
-        self.rec_count.store(rec_count, Ordering::Relaxed);
-        MonitorGuard {
-            monitor: self,
-            guard: ManuallyDrop::new(guard),
-        }
-    }
-}
-
-pub struct MonitorGuard<'a, T> {
-    monitor: &'a Monitor<T>,
-    guard: ManuallyDrop<MutexGuard<'a, T>>,
-}
-
-impl<T> Deref for MonitorGuard<'_, T> {
-    type Target = T;
-
-    fn deref(&self) -> &Self::Target {
-        &self.guard
-    }
-}
-
-impl<'a, T> MonitorGuard<'a, T> {
-    pub fn wait_no_handshake(&mut self) {
-        let rec_count = self.monitor.rec_count.swap(0, Ordering::Relaxed);
-        let holder = self.monitor.holder.swap(0, Ordering::Relaxed);
-        self.monitor.cvar.wait(&mut self.guard);
-        self.monitor.rec_count.store(rec_count, Ordering::Relaxed);
-        self.monitor.holder.store(holder, Ordering::Relaxed);
-    }
-
-    pub fn wait_for_no_handshake(&mut self, timeout: Duration) -> WaitTimeoutResult {
-        let rec_count = self.monitor.rec_count.swap(0, Ordering::Relaxed);
-        let holder = self.monitor.holder.swap(0, Ordering::Relaxed);
-        let result = self.monitor.cvar.wait_for(&mut self.guard, timeout);
-        self.monitor.rec_count.store(rec_count, Ordering::Relaxed);
-        self.monitor.holder.store(holder, Ordering::Relaxed);
-        result
-    }
-
-    pub fn notify(&self) {
-        self.monitor.cvar.notify_one();
-    }
-
-    pub fn notify_all(&self) {
-        self.monitor.cvar.notify_all();
-    }
-
-    pub fn monitor(&self) -> &Monitor<T> {
-        self.monitor
-    }
-
-    pub unsafe fn unlock_completely(&mut self) -> usize {
-        let result = self.monitor.rec_count.load(Ordering::Relaxed);
-        self.monitor.rec_count.store(0, Ordering::Relaxed);
-        self.monitor.holder.store(0, Ordering::Relaxed);
-        unsafe {
-            ManuallyDrop::drop(&mut self.guard);
-        }
-        result
-    }
-
-    pub fn wait_with_handshake<VM: VirtualMachine>(mut self) -> Self {
-        let t = Thread::<VM>::current();
-        t.context.save_thread_state();
-        let rec_count = parked_scope::<usize, VM>(|| {
-            self.wait_no_handshake();
-            let rec_count = unsafe { self.unlock_completely() };
-            rec_count
-        });
-        unsafe { self.monitor.relock_with_handshake::<VM>(rec_count) }
-    }
-}
-
-impl<'a, T> Drop for MonitorGuard<'a, T> {
-    fn drop(&mut self) {
-        if self.monitor.rec_count.fetch_sub(1, Ordering::Relaxed) == 1 {
-            self.monitor.holder.store(0, Ordering::Relaxed);
-            unsafe { ManuallyDrop::drop(&mut self.guard) };
-        }
-    }
-}
+pub use monitor::*;
 
+pub use super::threading::parked_scope;
\ No newline at end of file
diff --git a/vmkit/src/sync/monitor.rs b/vmkit/src/sync/monitor.rs
new file mode 100644
index 0000000..75ddbfa
--- /dev/null
+++ b/vmkit/src/sync/monitor.rs
@@ -0,0 +1,234 @@
+
+use std::{
+    mem::ManuallyDrop,
+    num::NonZeroU64,
+    ops::Deref,
+    sync::atomic::{AtomicU64, AtomicUsize, Ordering},
+    time::Duration,
+};
+
+use parking_lot::{Condvar, Mutex, MutexGuard, WaitTimeoutResult};
+
+use crate::{
+    threading::{parked_scope, Thread, ThreadContext},
+    VirtualMachine,
+};
+
+fn get_thread_id() -> NonZeroU64 {
+    thread_local! {
+        static KEY: u64 = 0;
+    }
+    KEY.with(|x| {
+        NonZeroU64::new(x as *const _ as u64).expect("thread-local variable address is null")
+    })
+}
+
+/// Implementation of a heavy lock and condition variable implemented using
+/// the primitives available from the `parking_lot`.  Currently we use
+/// a `Mutex` and `Condvar`.
+/// <p>
+/// It is perfectly safe to use this throughout the VM for locking.  It is
+/// meant to provide roughly the same functionality as ReentrantMutex combined with Condvar,
+/// except:
+/// <ul>
+/// <li>This struct provides a faster slow path than ReentrantMutex.</li>
+/// <li>This struct provides a slower fast path than ReentrantMutex.</li>
+/// <li>This struct will work in the inner guts of the VM runtime because
+///     it gives you the ability to lock and unlock, as well as wait and
+///     notify, without using any other VM runtime functionality.</li>
+/// <li>This struct allows you to optionally block without letting the thread
+///     system know that you are blocked.  The benefit is that you can
+///     perform synchronization without depending on VM thread subsystem functionality.
+///     However, most of the time, you should use the methods that inform
+///     the thread system that you are blocking.  Methods that have the
+///     `with_handshake` suffix will inform the thread system if you are blocked,
+///     while methods that do not have the suffix will either not block
+///     (as is the case with `unlock()` and `broadcast()`)
+///     or will block without letting anyone know (like `lock_no_handshake()`
+///     and `wait_no_handshake()`). Not letting the threading
+///     system know that you are blocked may cause things like GC to stall
+///     until you unblock.</li>
+/// <li>This struct does not provide mutable access to the protected data as it is unsound,
+///     instead use `RefCell` to mutate the protected data.</li>
+/// </ul>
+pub struct Monitor<T> {
+    mutex: Mutex<T>,
+    cvar: Condvar,
+    rec_count: AtomicUsize,
+    holder: AtomicU64,
+}
+
+impl<T> Monitor<T> {
+    pub const fn new(value: T) -> Self {
+        Self {
+            mutex: Mutex::new(value),
+            cvar: Condvar::new(),
+            rec_count: AtomicUsize::new(0),
+            holder: AtomicU64::new(0),
+        }
+    }
+
+    pub fn lock_no_handshake(&self) -> MonitorGuard<T> {
+        let my_slot = get_thread_id().get();
+        let guard = if self.holder.load(Ordering::Relaxed) != my_slot {
+            let guard = self.mutex.lock();
+            self.holder.store(my_slot, Ordering::Release);
+            MonitorGuard {
+                monitor: self,
+                guard: ManuallyDrop::new(guard),
+            }
+        } else {
+            MonitorGuard {
+                monitor: self,
+                guard: unsafe { ManuallyDrop::new(self.mutex.make_guard_unchecked()) },
+            }
+        };
+        self.rec_count.fetch_add(1, Ordering::Relaxed);
+        guard
+    }
+
+    pub fn lock_with_handshake<VM: VirtualMachine>(&self) -> MonitorGuard<T> {
+        let my_slot = get_thread_id().get();
+        let guard = if my_slot != self.holder.load(Ordering::Relaxed) {
+            let guard = self.lock_with_handshake_no_rec::<VM>();
+            self.holder.store(my_slot, Ordering::Release);
+            guard
+        } else {
+            MonitorGuard {
+                monitor: self,
+                guard: unsafe { ManuallyDrop::new(self.mutex.make_guard_unchecked()) },
+            }
+        };
+        self.rec_count.fetch_add(1, Ordering::Relaxed);
+        guard
+    }
+
+    fn lock_with_handshake_no_rec<VM: VirtualMachine>(&self) -> MonitorGuard<'_, T> {
+        let tls = Thread::<VM>::current();
+        tls.context.save_thread_state();
+
+        let mutex_guard = loop {
+            Thread::<VM>::enter_native();
+            let guard = self.mutex.lock();
+            if Thread::<VM>::attempt_leave_native_no_block() {
+                break guard;
+            } else {
+                drop(guard);
+                Thread::<VM>::leave_native();
+            }
+        };
+
+        MonitorGuard {
+            monitor: self,
+            guard: ManuallyDrop::new(mutex_guard),
+        }
+    }
+
+    pub fn notify(&self) {
+        self.cvar.notify_one();
+    }
+
+    pub fn notify_all(&self) {
+        self.cvar.notify_all();
+    }
+
+    pub unsafe fn relock_with_handshake<VM: VirtualMachine>(
+        &self,
+        rec_count: usize,
+    ) -> MonitorGuard<'_, T> {
+        let thread = Thread::<VM>::current();
+        thread.context.save_thread_state();
+        let guard = loop {
+            Thread::<VM>::enter_native();
+            let lock = self.mutex.lock();
+            if Thread::<VM>::attempt_leave_native_no_block() {
+                break lock;
+            } else {
+                drop(lock);
+                Thread::<VM>::leave_native();
+            }
+        };
+
+        self.holder.store(get_thread_id().get(), Ordering::Relaxed);
+        self.rec_count.store(rec_count, Ordering::Relaxed);
+        MonitorGuard {
+            monitor: self,
+            guard: ManuallyDrop::new(guard),
+        }
+    }
+}
+
+pub struct MonitorGuard<'a, T> {
+    monitor: &'a Monitor<T>,
+    guard: ManuallyDrop<MutexGuard<'a, T>>,
+}
+
+impl<T> Deref for MonitorGuard<'_, T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        &self.guard
+    }
+}
+
+impl<'a, T> MonitorGuard<'a, T> {
+    pub fn wait_no_handshake(&mut self) {
+        let rec_count = self.monitor.rec_count.swap(0, Ordering::Relaxed);
+        let holder = self.monitor.holder.swap(0, Ordering::Relaxed);
+        self.monitor.cvar.wait(&mut self.guard);
+        self.monitor.rec_count.store(rec_count, Ordering::Relaxed);
+        self.monitor.holder.store(holder, Ordering::Relaxed);
+    }
+
+    pub fn wait_for_no_handshake(&mut self, timeout: Duration) -> WaitTimeoutResult {
+        let rec_count = self.monitor.rec_count.swap(0, Ordering::Relaxed);
+        let holder = self.monitor.holder.swap(0, Ordering::Relaxed);
+        let result = self.monitor.cvar.wait_for(&mut self.guard, timeout);
+        self.monitor.rec_count.store(rec_count, Ordering::Relaxed);
+        self.monitor.holder.store(holder, Ordering::Relaxed);
+        result
+    }
+
+    pub fn notify(&self) {
+        self.monitor.cvar.notify_one();
+    }
+
+    pub fn notify_all(&self) {
+        self.monitor.cvar.notify_all();
+    }
+
+    pub fn monitor(&self) -> &Monitor<T> {
+        self.monitor
+    }
+
+    pub unsafe fn unlock_completely(&mut self) -> usize {
+        let result = self.monitor.rec_count.load(Ordering::Relaxed);
+        self.monitor.rec_count.store(0, Ordering::Relaxed);
+        self.monitor.holder.store(0, Ordering::Relaxed);
+        unsafe {
+            ManuallyDrop::drop(&mut self.guard);
+        }
+        result
+    }
+
+    pub fn wait_with_handshake<VM: VirtualMachine>(mut self) -> Self {
+        let t = Thread::<VM>::current();
+        t.context.save_thread_state();
+        let rec_count = parked_scope::<usize, VM>(|| {
+            self.wait_no_handshake();
+            let rec_count = unsafe { self.unlock_completely() };
+            rec_count
+        });
+        unsafe { self.monitor.relock_with_handshake::<VM>(rec_count) }
+    }
+}
+
+impl<'a, T> Drop for MonitorGuard<'a, T> {
+    fn drop(&mut self) {
+        if self.monitor.rec_count.fetch_sub(1, Ordering::Relaxed) == 1 {
+            self.monitor.holder.store(0, Ordering::Relaxed);
+            unsafe { ManuallyDrop::drop(&mut self.guard) };
+        }
+    }
+}
+
diff --git a/vmkit/src/semaphore.rs b/vmkit/src/sync/semaphore.rs
similarity index 100%
rename from vmkit/src/semaphore.rs
rename to vmkit/src/sync/semaphore.rs
diff --git a/vmkit/src/threading.rs b/vmkit/src/threading.rs
index e6e063b..8b09458 100644
--- a/vmkit/src/threading.rs
+++ b/vmkit/src/threading.rs
@@ -18,14 +18,14 @@ use mmtk::{
 use parking_lot::Once;
 
 use crate::{
+    machine_context::PlatformRegisters,
     mm::{
         conservative_roots::ConservativeRoots,
         stack_bounds::{current_stack_pointer, StackBounds},
         tlab::TLAB,
         AllocFastPath, MemoryManager,
     },
-    semaphore::Semaphore,
-    sync::{Monitor, MonitorGuard},
+    sync::{semaphore::Semaphore, Monitor, MonitorGuard},
     VirtualMachine,
 };
 
@@ -195,6 +195,7 @@ pub struct Thread<VM: VirtualMachine> {
     should_block_for_gc: AtomicBool,
 
     stack_pointer: Atomic<usize>,
+    platform_registers: UnsafeCell<MaybeUninit<PlatformRegisters>>,
     suspend_count: AtomicUsize,
     /// The monitor of the thread. Protects access to the thread's state.
     monitor: Monitor<()>,
@@ -264,6 +265,7 @@ impl<VM: VirtualMachine> Thread<VM> {
             should_block_for_gc: AtomicBool::new(false),
             monitor: Monitor::new(()),
             communication_lock: Monitor::new(()),
+            platform_registers: UnsafeCell::new(MaybeUninit::zeroed()),
         })
     }
 
@@ -445,6 +447,7 @@ impl<VM: VirtualMachine> Thread<VM> {
         self.add_about_to_terminate();
     }
 
+    /// Start a main thread.
     pub fn main<F, R>(context: VM::ThreadContext, f: F) -> Option<R>
     where
         F: FnOnce() -> R + Send + 'static,
@@ -739,6 +742,40 @@ impl<VM: VirtualMachine> Thread<VM> {
         self.check_block_no_save_context();
     }
 
+    /// Save the thread's registers.
+    pub fn save_registers(&self) {
+        assert_eq!(
+            self.platform_handle(),
+            Self::current().platform_handle(),
+            "attempt to save registers of another thread"
+        );
+        let sp = current_stack_pointer();
+        self.stack_pointer.store(sp.as_usize(), Ordering::Relaxed);
+        unsafe {
+            cfgenius::cond! {
+                if macro(crate::macros::have_machine_context) {
+                    let mut ucontext = MaybeUninit::<libc::ucontext_t>::uninit();
+                    libc::getcontext(ucontext.as_mut_ptr());
+                    let registers = crate::machine_context::registers_from_ucontext(ucontext.as_ptr()).read();
+                    self.platform_registers.get().write(MaybeUninit::new(registers));
+                } else {
+                   self.platform_registers.get().write(MaybeUninit::new(crate::machine_context::PlatformRegisters {
+                        stack_pointer: sp.as_usize() as _
+                   }));
+
+                }
+            }
+        }
+    }
+
+    /// Get the thread's registers.
+    ///
+    /// NOTE: Does not guarantee valid registers
+    /// nor that the returned value is the currently active registers.
+    pub fn get_registers(&self) -> PlatformRegisters {
+        unsafe { self.platform_registers.get().read().assume_init() }
+    }
+
     /// Return this thread's stack pointer.
     ///
     /// Note: Does not guarantee that the returned value is currently active stack pointer.
@@ -928,9 +965,15 @@ impl<VM: VirtualMachine> Thread<VM> {
         A::is_blocked(self)
     }
 
+    /// Change thread state to `InNative` and store the current stack pointer.
+    ///
+    /// This method will mark this thread as blocked for thread system and GC can run
+    /// concurrently with this thread. Note that native code *must* not access GC heap
+    /// as it can lead to undefined behavior.
     pub fn enter_native() {
         let t = Self::current();
-        t.stack_pointer.store(current_stack_pointer().as_usize(), Ordering::Relaxed);
+        t.stack_pointer
+            .store(current_stack_pointer().as_usize(), Ordering::Relaxed);
         let mut old_state;
         loop {
             old_state = t.get_exec_status();
@@ -947,6 +990,10 @@ impl<VM: VirtualMachine> Thread<VM> {
         }
     }
 
+    /// Attempt to leave native state and return to managed state without blocking.
+    ///
+    /// If this method returns `false` you should call [`leave_native`](Self::leave_native) to block the thread or
+    /// spin wait untilt this method returns `true`.
     #[must_use = "If thread can't leave native state without blocking, call [leave_native](Thread::leave_native) instead"]
     pub fn attempt_leave_native_no_block() -> bool {
         let t = Self::current();
@@ -966,12 +1013,16 @@ impl<VM: VirtualMachine> Thread<VM> {
         }
     }
 
+    /// Leave native state and return to managed state.
+    ///
+    /// This method might block the thread if any block requestes are pending.
     pub fn leave_native() {
         if !Self::attempt_leave_native_no_block() {
             Self::current().leave_native_blocked();
         }
     }
 
+    /// Unblock the thread and notify all waiting threads.
     pub fn unblock<A: BlockAdapter<VM>>(&self) {
         let lock = self.monitor.lock_no_handshake();
         A::clear_block_request(self);
@@ -980,10 +1031,12 @@ impl<VM: VirtualMachine> Thread<VM> {
         drop(lock);
     }
 
+    /// Are yieldpoints enabled for this thread?
     pub fn yieldpoints_enabled(&self) -> bool {
         self.yieldpoints_enabled_count.load(Ordering::Relaxed) == 1
     }
 
+    /// Enable yieldpoints for this thread.
     pub fn enable_yieldpoints(&self) {
         let val = self
             .yieldpoints_enabled_count
@@ -997,6 +1050,7 @@ impl<VM: VirtualMachine> Thread<VM> {
         }
     }
 
+    /// Disable yieldpoints for this thread.
     pub fn disable_yieldpoints(&self) {
         self.yieldpoints_enabled_count
             .fetch_sub(1, Ordering::Relaxed);
@@ -1197,7 +1251,7 @@ impl<VM: VirtualMachine> Thread<VM> {
         let thread = Thread::<VM>::current();
         let _was_at_yieldpoint = thread.at_yieldpoint.load(atomic::Ordering::Relaxed);
         thread.at_yieldpoint.store(true, atomic::Ordering::Relaxed);
-       
+
         // If thread is in critical section we can't do anything right now, defer
         // until later
         // we do this without acquiring locks, since part of the point of disabling
@@ -1275,6 +1329,18 @@ pub(crate) fn deinit_current_thread<VM: VirtualMachine>() {
     })
 }
 
+/// Thread management system. This type is responsible
+/// for registering and managing all threads in the system. When GC
+/// requests a thread to block, it will go through this type.
+///
+/// Threads are suspended in two ways:
+/// - 1) Yieldpoints are requested: this assumes that runtime is cooperative
+/// and does periodically invoke check [`take_yieldpoint`](Thread::take_yieldpoint) to be non-zero and then
+/// calls into [`Thread::yieldpoint`].
+/// - 2) Signals: this is used when runtime is uncooperative and runs without yieldpoints. We deliver
+/// UNIX signals or Windows exceptions to suspend the thread. This method is less precise and does not
+/// account for all locks held in the mutators, so it is less safe. It is highly discouraged to use
+/// [`sync`](crate::sync) module in uncooperative mode.
 pub struct ThreadManager<VM: VirtualMachine> {
     inner: Monitor<RefCell<ThreadManagerInner<VM>>>,
     soft_handshake_left: AtomicUsize,
@@ -1447,7 +1513,8 @@ impl<VM: VirtualMachine> ThreadManager<VM> {
             // Deal with terminating threads to ensure that all threads are either dead to MMTk or stopped above.
             self.process_about_to_terminate();
 
-            let threads = self.inner
+            let threads = self
+                .inner
                 .lock_no_handshake()
                 .borrow()
                 .threads
@@ -1465,7 +1532,7 @@ impl<VM: VirtualMachine> ThreadManager<VM> {
                 }
             }
 
-            threads 
+            threads
         } else {
             self.process_about_to_terminate();
             let mut handshake_threads = Vec::with_capacity(4);
@@ -1844,6 +1911,7 @@ static TARGET_THREAD: AtomicPtr<()> = AtomicPtr::new(std::ptr::null_mut());
 extern "C-unwind" fn signal_handler_suspend_resume<VM: VirtualMachine>(
     _signal: i32,
     _info: *const libc::siginfo_t,
+    _ucontext: *mut libc::c_void,
 ) {
     let target = TARGET_THREAD.load(Ordering::Relaxed).cast::<Thread<VM>>();
     let thread = unsafe { target.as_ref().unwrap() };
@@ -1878,6 +1946,22 @@ extern "C-unwind" fn signal_handler_suspend_resume<VM: VirtualMachine>(
     thread
         .stack_pointer
         .store(approximate_stack_pointer.as_usize(), Ordering::Release);
+
+    cfgenius::cond! {
+        if macro(crate::macros::have_machine_context) {
+            let user_context = _ucontext.cast::<libc::ucontext_t>();
+            unsafe {
+                thread.platform_registers.get().write(MaybeUninit::new(crate::machine_context::registers_from_ucontext(user_context).read()));
+            }
+        } else {
+            unsafe {
+                thread.platform_registers.get().write(MaybeUninit::new(crate::machine_context::PlatformRegisters {
+                    stack_pointer: approximate_stack_pointer.as_usize() as *mut u8
+                }))
+            }
+        }
+    }
+
     // Allow suspend caller to see that this thread is suspended.
     // sem_post is async-signal-safe function. It means that we can call this from a signal handler.
     // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html#tag_02_04_03