diff --git a/Cargo.lock b/Cargo.lock index 671f204..98e459b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,6 +150,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfgenius" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ae118db0db0e2137671e9b916de8af5d3be65a5fb1ad3a9c7925c38b6644cfb" + [[package]] name = "clap" version = "4.5.28" @@ -829,6 +835,12 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1223,6 +1235,7 @@ dependencies = [ "atomic", "bytemuck", "cfg-if", + "cfgenius", "clap", "easy-bitfield", "env_logger", @@ -1231,6 +1244,7 @@ dependencies = [ "log", "mmtk", "parking_lot", + "paste", "rand", "sysinfo 0.33.1", "vmkit-proc", diff --git a/vmkit-proc/src/bytecode.rs b/vmkit-proc/src/bytecode.rs new file mode 100644 index 0000000..49e6999 --- /dev/null +++ b/vmkit-proc/src/bytecode.rs @@ -0,0 +1,6 @@ +//! Bytecode generation DSL +//! +//! This module provides a DSL for generating bytecode instructions. + +pub mod decl; +pub mod fits; \ No newline at end of file diff --git a/vmkit-proc/src/bytecode/decl.rs b/vmkit-proc/src/bytecode/decl.rs new file mode 100644 index 0000000..4090f4a --- /dev/null +++ b/vmkit-proc/src/bytecode/decl.rs @@ -0,0 +1,62 @@ +use proc_macro2::TokenStream; +use quote::quote; + +use super::fits; + + + +/// A single opcode argument +pub struct Argument { + pub name: syn::Ident, + pub index: usize, + pub optional: bool, + pub ty: syn::Type +} + +impl Argument { + pub fn new(name: syn::Ident, index: usize, optional: bool, ty: syn::Type) -> Self { + Self { + name, + index, + optional, + ty + } + } + + pub fn field(&self) -> TokenStream { + let name = &self.name; + let ty = &self.ty; + quote! { + #name: #ty + } + } + + pub fn create_param(&self) -> TokenStream { + let name = &self.name; + let ty = &self.ty; + if self.optional { + quote! { + #name: Option<#ty> + } + } else { + quote! { + #name: #ty + } + } + } + + pub fn field_name(&self) -> TokenStream { + let name = &self.name; + quote! { + #name + } + } + + pub fn fits_check(&self, size: usize) -> TokenStream { + fits::check(size, self.name.clone(), self.ty.clone()) + } + + pub fn fits_write(&self, size: usize) -> TokenStream { + fits::write(size, self.name.clone(), self.ty.clone()) + } +} \ No newline at end of file diff --git a/vmkit-proc/src/bytecode/fits.rs b/vmkit-proc/src/bytecode/fits.rs new file mode 100644 index 0000000..6020fcc --- /dev/null +++ b/vmkit-proc/src/bytecode/fits.rs @@ -0,0 +1,25 @@ +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use syn::Ident; + +pub fn convert(size: usize, name: syn::Ident, typ: syn::Type) -> TokenStream { + let sz = Ident::new(&format!("S{size}"), Span::call_site()); + quote! { + Fits::<#typ, #sz>::convert(#name) + } +} + +pub fn check(size: usize, name: syn::Ident, typ: syn::Type) -> TokenStream { + let sz = Ident::new(&format!("S{size}"), Span::call_site()); + quote! { + Fits::<#typ, #sz>::check(#name) + } +} + +pub fn write(size: usize, name: syn::Ident, typ: syn::Type) -> TokenStream { + let sz = Ident::new(&format!("S{size}"), Span::call_site()); + let cvt = convert(size, name, typ); + quote! { + generator.write(#cvt) + } +} \ No newline at end of file diff --git a/vmkit-proc/src/lib.rs b/vmkit-proc/src/lib.rs index d96bb66..e031634 100644 --- a/vmkit-proc/src/lib.rs +++ b/vmkit-proc/src/lib.rs @@ -2,12 +2,9 @@ extern crate proc_macro; -use std::collections::HashSet; - -use proc_macro::TokenStream; -use proc_macro2::{extra, Span}; +use proc_macro2::Span; use quote::quote; -use syn::{parse_macro_input, Arm, Data, DeriveInput}; +use std::collections::HashSet; use synstructure::{decl_derive, AddBounds}; struct ArrayLike { @@ -15,6 +12,7 @@ struct ArrayLike { /// Getter for length field. pub length_getter: Option, /// Setter for length field. + #[allow(dead_code)] pub length_setter: Option, pub element_type: syn::Type, pub data_field: Option, @@ -155,6 +153,7 @@ fn find_arraylike(attrs: &[syn::Attribute]) -> syn::Result> { enum ObjectAlignment { Auto, Const(syn::Expr), + #[allow(dead_code)] Compute(syn::Expr), } @@ -331,7 +330,7 @@ pub fn derive_gcmetadata(input: TokenStream) -> TokenStream { decl_derive!([GCMetadata,attributes(gcmetadata, arraylike, ignore_trace)] => derive_gcmetadata); -fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenStream { +fn derive_gcmetadata(s: synstructure::Structure<'_>) -> proc_macro2::TokenStream { let gc_metadata = match find_gcmetadata(&s.ast().attrs) { Ok(gc) => gc, Err(err) => return err.to_compile_error(), @@ -372,7 +371,6 @@ fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenSt let trace_impl = if scan_slots { let trace_body = filtered.each(|bi| { - quote! { mark(#bi, visitor); } @@ -484,40 +482,48 @@ fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenSt ) }; - (Some(trace_impl), if scan_slots { - let full_path = quote! { <#vm as vmkit::VirtualMachine>::Slot }; - quote! { ::vmkit::prelude::TraceCallback::ScanSlots(|object, visitor| unsafe { - let object = object.as_address().as_ref::<#name>(); - ::vmkit::prelude::Scan::<#full_path>::scan_object(object, visitor); - }) } - } else { - quote! { ::vmkit::prelude::TraceCallback::TraceObject(|object, visitor| unsafe { - let object = object.as_address().as_mut_ref::<#name>(); - ::vmkit::prelude::Trace::trace_object(object, visitor); - }) } - }) + ( + Some(trace_impl), + if scan_slots { + let full_path = quote! { <#vm as vmkit::VirtualMachine>::Slot }; + quote! { ::vmkit::prelude::TraceCallback::ScanSlots(|object, visitor| unsafe { + let object = object.as_address().as_ref::<#name>(); + ::vmkit::prelude::Scan::<#full_path>::scan_object(object, visitor); + }) } + } else { + quote! { ::vmkit::prelude::TraceCallback::TraceObject(|object, visitor| unsafe { + let object = object.as_address().as_mut_ref::<#name>(); + ::vmkit::prelude::Trace::trace_object(object, visitor); + }) } + }, + ) } ObjectTrace::Trace(scan_slots, expr) => { if scan_slots { - (None, quote! { - ::vmkit::prelude::TraceCallback::ScanSlots(|object, visitor| unsafe { - let this = object.as_address().as_ref::<#name>(); - #expr - }) - }) + ( + None, + quote! { + ::vmkit::prelude::TraceCallback::ScanSlots(|object, visitor| unsafe { + let this = object.as_address().as_ref::<#name>(); + #expr + }) + }, + ) } else { - (None, quote! { - ::vmkit::prelude::TraceCallback::TraceObject(|object, visitor| unsafe { - let this = object.as_address().as_mut_ref::<#name>(); - #expr - }) - }) + ( + None, + quote! { + ::vmkit::prelude::TraceCallback::TraceObject(|object, visitor| unsafe { + let this = object.as_address().as_mut_ref::<#name>(); + #expr + }) + }, + ) } } - , }; - + let instance_size = match &gc_metadata.size { ObjectSize::Auto if arraylike.is_some() => quote! { 0 }, ObjectSize::Size(_) if arraylike.is_some() => quote! { 0 }, @@ -547,12 +553,11 @@ fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenSt panic!("length field not found"); }; - let element = &arraylike.element_type; quote! { Some(|object| unsafe { let this = object.as_address().as_ref::<#name>(); let length = #length; - + size_of::<#name>() + (length * ::std::mem::size_of::<#element>()) }) } @@ -589,3 +594,5 @@ fn derive_gcmetadata(mut s: synstructure::Structure<'_>) -> proc_macro2::TokenSt output.into() } + +mod bytecode; \ No newline at end of file diff --git a/vmkit/Cargo.toml b/vmkit/Cargo.toml index ee201d8..baf917a 100644 --- a/vmkit/Cargo.toml +++ b/vmkit/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" atomic = "0.6.0" bytemuck = "1.21.0" cfg-if = "1.0.0" +cfgenius = "0.1.1" clap = { version = "4.5.28", features = ["derive"] } easy-bitfield = "0.1.0" env_logger = "0.11.6" @@ -15,6 +16,7 @@ libc = "0.2.169" log = { version = "0.4.25" } mmtk = { git = "https://github.com/mmtk/mmtk-core" } parking_lot = "0.12.3" +paste = "1.0.15" rand = "0.9.0" sysinfo = "0.33.1" vmkit-proc = { path = "../vmkit-proc", optional = true } diff --git a/vmkit/examples/binarytrees-mt.c b/vmkit/examples/binarytrees-mt.c new file mode 100644 index 0000000..72fe58b --- /dev/null +++ b/vmkit/examples/binarytrees-mt.c @@ -0,0 +1,104 @@ +#define GC_THREADS 1 +#define GC_NO_THREAD_REDIRECTS 1 +#include + +#include +#include + + +typedef struct Node { + struct Node *left; + struct Node *right; +} Node; + +Node* leaf() { + return GC_malloc(sizeof(Node)); +} + +Node* new_node(Node* left, Node* right) { + Node* node = GC_malloc(sizeof(Node)); + node->left = left; + node->right = right; + return node; +} + +int itemCheck(Node* node) { + if (node->left == NULL) { + return 1; + } + return 1 + itemCheck(node->left) + itemCheck(node->right); +} + +Node* bottomUpTree(int depth) { + if (depth > 0) { + return new_node(bottomUpTree(depth - 1), bottomUpTree(depth - 1)); + } + return leaf(); +} + +extern void* __data_start; +extern void* _end; + +Node* longLivedTree; +char results[16][256]; +void* threadWork(void* data) { + struct GC_stack_base base; + GC_get_stack_base(&base); + GC_register_my_thread(&base); + int d = (int)data; + int iterations = 1 << (21 - d + 4); + int check = 0; + for (int i = 0; i < iterations; i++) { + Node* treeNode = bottomUpTree(d); + check += itemCheck(treeNode); + } + //results[(d-4)/2] = + + sprintf(&results[(d-4)/2][0],"%d\t trees of depth %d\t check: %d\n", iterations, d, check); + GC_unregister_my_thread(); + return NULL; +} + +int main() { + printf("DATA START: %p\n", &__data_start); + printf("DATA END: %p\n", &_end); + GC_use_entire_heap = 1; + GC_allow_register_threads(); + GC_init(); + + + int maxDepth = 21; + int stretchDepth = maxDepth + 1; + int start = clock(); + Node* stretchTree = bottomUpTree(stretchDepth); + printf("stretch tree of depth %d\n", stretchDepth); + printf("time: %f\n", ((double)clock() - start) / CLOCKS_PER_SEC); + + longLivedTree = bottomUpTree(maxDepth); + GC_gcollect(); + + pthread_t threads[16]; + + printf("long lived tree of depth %d\t check: %d\n", maxDepth, itemCheck(longLivedTree)); + for (int d = 4; d <= maxDepth; d += 2) { + /*int iterations = 1 << (maxDepth - d + 4); + int check = 0; + for (int i = 0; i < iterations; i++) { + Node* treeNode = bottomUpTree(d); + check += itemCheck(treeNode); + } + printf("%d\t trees of depth %d\t check: %d\n", iterations, d, check);*/ + void* data = (void*)d; + pthread_create(&threads[(d-4)/2], NULL, threadWork, data); + } + + for (int d = 4; d <= maxDepth; d += 2) { + pthread_join(threads[(d-4)/2], NULL); + printf(results[(d-4)/2]); + } + + printf("long lived tree of depth %d\t check: %d\n", maxDepth, itemCheck(longLivedTree)); + printf("time: %f\n", ((double)clock() - start) / CLOCKS_PER_SEC); + + return 0; +} diff --git a/vmkit/examples/binarytrees.c b/vmkit/examples/binarytrees.c index bfb30a5..914b348 100644 --- a/vmkit/examples/binarytrees.c +++ b/vmkit/examples/binarytrees.c @@ -46,7 +46,7 @@ int main() { GC_init(); - int maxDepth = 18; + int maxDepth = 21; int stretchDepth = maxDepth + 1; int start = clock(); Node* stretchTree = bottomUpTree(stretchDepth); diff --git a/vmkit/src/bdwgc_shim.rs b/vmkit/src/bdwgc_shim.rs index 7bd4c97..4e30547 100644 --- a/vmkit/src/bdwgc_shim.rs +++ b/vmkit/src/bdwgc_shim.rs @@ -253,6 +253,10 @@ impl VirtualMachine for BDWGC { } } } + + fn compute_hashcode(object: VMKitObject) -> usize { + object.as_address().as_usize() + } } type VTableAddress = BitField; @@ -605,6 +609,11 @@ pub extern "C-unwind" fn GC_unregister_my_thread() { unsafe { Thread::::unregister_mutator_manual() }; } +#[no_mangle] +pub extern "C-unwind" fn GC_allow_register_threads() { + /* noop: always allowed */ +} + #[no_mangle] pub extern "C-unwind" fn GC_pthread_create( thread_ptr: &mut libc::pthread_t, diff --git a/vmkit/src/lib.rs b/vmkit/src/lib.rs index 07a9756..dbca05c 100644 --- a/vmkit/src/lib.rs +++ b/vmkit/src/lib.rs @@ -13,10 +13,9 @@ pub mod mm; pub mod object_model; pub mod options; pub mod platform; -pub mod semaphore; pub mod sync; pub mod threading; - +pub mod macros; #[cfg(feature = "uncooperative")] pub mod bdwgc_shim; @@ -219,8 +218,8 @@ impl VMKit { #[cfg(feature="derive")] pub use vmkit_proc::GCMetadata; - pub mod prelude { + #[cfg(feature="derive")] pub use super::GCMetadata; pub use super::mm::traits::*; pub use super::object_model::object::*; diff --git a/vmkit/src/machine_context.rs b/vmkit/src/machine_context.rs index 139597f..e15d53b 100644 --- a/vmkit/src/machine_context.rs +++ b/vmkit/src/machine_context.rs @@ -1,2 +1,62 @@ +cfgenius::cond! { + + if macro(crate::macros::darwin) { + cfgenius::cond! { + if cfg(target_os="x86_64") { + pub type PlatformRegisters = libc::__darwin_x86_thread_state64; + } else if cfg!(target_os="arm64") { + pub type PlatformRegisters = libc::__darwin_arm_thread_state64; + } else { + compile_error!("Unsupported Apple target"); + } + } + + pub unsafe fn registers_from_ucontext(ucontext: *const libc::ucontext_t) -> *const PlatformRegisters { + return &(*ucontext).uc_mcontext.__ss; + } + } else if macro(crate::macros::have_machine_context) { + + #[cfg(not(target_os="openbsd"))] + use libc::mcontext_t; + #[cfg(target_os="openbsd")] + use libc::ucontext_t as mcontext_t; + + #[repr(C)] + #[derive(Clone)] + pub struct PlatformRegisters { + pub machine_context: mcontext_t + } + + pub unsafe fn registers_from_ucontext(ucontext: *const libc::ucontext_t) -> *const PlatformRegisters { + cfgenius::cond! { + if cfg(target_os="openbsd") + { + return ucontext.cast(); + } + else if cfg(target_arch="powerpc") + { + return unsafe { std::mem::transmute( + &(*ucontext).uc_mcontext.uc_regs + ) } + } else { + return unsafe { std::mem::transmute( + &(*ucontext).uc_mcontext + ) } + } + + } + } + + + } else if cfg(windows) { + use winapi::um::winnt::CONTEXT; + + pub type PlatformRegisters = CONTEXT; + } else { + pub struct PlatformRegisters { + pub stack_pointer: *mut u8 + } + } +} diff --git a/vmkit/src/macros.rs b/vmkit/src/macros.rs new file mode 100644 index 0000000..634491e --- /dev/null +++ b/vmkit/src/macros.rs @@ -0,0 +1,27 @@ +cfgenius::define! { + pub darwin = cfg(target_vendor="apple"); + pub ios_family = all(macro(darwin), cfg(target_os="ios")); + pub have_machine_context = any( + macro(darwin), + cfg(target_os="fuchsia"), + all( + cfg(any( + target_os="freebsd", + target_os="haiku", + target_os="netbsd", + target_os="openbsd", + target_os="linux", + target_os="hurd" + )), + cfg( + any( + target_arch="x86_64", + target_arch="arm", + target_arch="aarch64", + target_arch="riscv64", + ) + ) + )); + +} + diff --git a/vmkit/src/main.rs b/vmkit/src/main.rs index b86d3d8..ef6b1d0 100644 --- a/vmkit/src/main.rs +++ b/vmkit/src/main.rs @@ -1,7 +1,6 @@ use mmtk::util::Address; use mmtk::vm::slot::UnimplementedMemorySlice; use mmtk::{util::options::PlanSelector, vm::slot::SimpleSlot, AllocationSemantics, MMTKBuilder}; -use vmkit::GCMetadata; use std::cell::RefCell; use std::mem::offset_of; use std::sync::Arc; @@ -182,16 +181,7 @@ fn bottom_up_tree(thread: &Thread, depth: usize) -> NodeRef { const MIN_DEPTH: usize = 4; -#[derive(GCMetadata)] -#[gcmetadata( - vm = BenchVM, - scan -)] -enum Object { - - Fixnum(#[ignore_trace] i32), - Boxed(VMKitObject) -} + @@ -210,7 +200,7 @@ fn main() { .unwrap_or_else(|_| panic!()); Thread::::main(ThreadBenchContext, || { - let thread = Thread::::current(); + /*let thread = Thread::::current(); let start = std::time::Instant::now(); let n = std::env::var("DEPTH") .unwrap_or("18".to_string()) @@ -271,6 +261,17 @@ fn main() { ); let duration = start.elapsed(); - println!("time: {duration:?}"); + println!("time: {duration:?}");*/ + + + let thread = Thread::::current(); + + thread.save_registers(); + + let registers = thread.get_registers(); + + for (i, greg) in registers.machine_context.gregs.iter().enumerate() { + println!("{:02}: {:x}",i, greg); + } }); } diff --git a/vmkit/src/mm.rs b/vmkit/src/mm.rs index 67c8faf..177fbf8 100644 --- a/vmkit/src/mm.rs +++ b/vmkit/src/mm.rs @@ -146,6 +146,7 @@ impl MemoryManager { object_start.store(HeapObjectHeader::::new(metadata)); let object = VMKitObject::from_address(object_start + OBJECT_REF_OFFSET); Self::set_vo_bit(object); + debug_assert!(mmtk::memory_manager::is_mapped_address(object.as_address())); return object; } @@ -272,6 +273,7 @@ impl MemoryManager { let object = VMKitObject::from_address(object_start + OBJECT_REF_OFFSET); Self::set_vo_bit(object); Self::refill_tlab(thread); + debug_assert!(mmtk::memory_manager::is_mapped_address(object.as_address())); object } } diff --git a/vmkit/src/mm/scanning.rs b/vmkit/src/mm/scanning.rs index c5739ef..3feb4e3 100644 --- a/vmkit/src/mm/scanning.rs +++ b/vmkit/src/mm/scanning.rs @@ -1,17 +1,13 @@ use std::marker::PhantomData; use crate::{ - mm::MemoryManager, - object_model::{ + machine_context::PlatformRegisters, mm::MemoryManager, object_model::{ metadata::{Metadata, TraceCallback}, object::VMKitObject, - }, - threading::{Thread, ThreadContext}, - VirtualMachine, + }, threading::{Thread, ThreadContext}, VirtualMachine }; use mmtk::{ - vm::{slot::Slot, ObjectTracer, Scanning, SlotVisitor}, - MutatorContext, + util::Address, vm::{slot::Slot, ObjectTracer, Scanning, SlotVisitor}, MutatorContext }; use super::traits::ToSlot; @@ -106,9 +102,17 @@ impl Scanning> for VMKitScanning { use super::conservative_roots::ConservativeRoots; let mut croots = ConservativeRoots::new(128); let bounds = *tls.stack_bounds(); + let registers = tls.get_registers(); + + unsafe { + let start = Address::from_ref(®isters); + let end = start.add(size_of::()); + croots.add_span(start, end); + } unsafe { croots.add_span(bounds.origin(), tls.stack_pointer()) }; tls.context.scan_conservative_roots(&mut croots); croots.add_to_factory(&mut factory); + drop(registers); } } diff --git a/vmkit/src/object_model/compression.rs b/vmkit/src/object_model/compression.rs index 878230b..104455d 100644 --- a/vmkit/src/object_model/compression.rs +++ b/vmkit/src/object_model/compression.rs @@ -47,6 +47,10 @@ static COMPRESSED_OPS: CompressedOpsStorage = range: (Address::ZERO, Address::ZERO), })); + +pub const MY_RANDOM_ADDR: usize = 0x500_000_000 + 6 * 1024 * 1024 * 1024; +pub const COMPRESSED: usize = (MY_RANDOM_ADDR - 0x500_000_000) >> 3; + impl CompressedOps { /// Initialize compressed object pointers. /// diff --git a/vmkit/src/object_model/header.rs b/vmkit/src/object_model/header.rs index 37cf2d1..d69038a 100644 --- a/vmkit/src/object_model/header.rs +++ b/vmkit/src/object_model/header.rs @@ -11,7 +11,7 @@ pub const OBJECT_REF_OFFSET: isize = 8; pub const OBJECT_HEADER_OFFSET: isize = -OBJECT_REF_OFFSET; pub const HASHCODE_OFFSET: isize = -(OBJECT_REF_OFFSET + size_of::() as isize); -pub const METADATA_BIT_LIMIT: usize = if ADDRESS_BASED_HASHING { usize::BITS as usize - 2 } else { usize::BITS as usize }; +pub const METADATA_BIT_LIMIT: usize = if ADDRESS_BASED_HASHING { usize::BITS as usize - 2 } else { usize::BITS as usize - 1 }; pub type MetadataField = BitField; pub type HashStateField = BitField; diff --git a/vmkit/src/sync.rs b/vmkit/src/sync.rs index e10bf17..b1242bb 100644 --- a/vmkit/src/sync.rs +++ b/vmkit/src/sync.rs @@ -1,233 +1,17 @@ -use std::{ - mem::ManuallyDrop, - num::NonZeroU64, - ops::Deref, - sync::atomic::{AtomicU64, AtomicUsize, Ordering}, - time::Duration, -}; +//! Synchronization primitives for VMKit. +//! +//! +//! Provides synchronization primitives which are friendly to our thread system. Most of the types +//! provide `*_with_handshake` and `*_no_handshake` methods. The main difference between these two +//! methods is that the former will notify the scheduler that thread is blocked, while the latter +//! will not. Most of the times your code should use the `*_with_handshake` methods, so that GC +//! or other tasks can be scheduled while the thread is blocked. `*_no_handshake` methods should be +//! used when the thread is accessing GC data structures, or when the thread is not allowed to be +//! blocked. +pub mod semaphore; +pub mod monitor; -use parking_lot::{Condvar, Mutex, MutexGuard, WaitTimeoutResult}; -use crate::{ - threading::{parked_scope, Thread, ThreadContext}, - VirtualMachine, -}; - -fn get_thread_id() -> NonZeroU64 { - thread_local! { - static KEY: u64 = 0; - } - KEY.with(|x| { - NonZeroU64::new(x as *const _ as u64).expect("thread-local variable address is null") - }) -} - -/// Implementation of a heavy lock and condition variable implemented using -/// the primitives available from the `parking_lot`. Currently we use -/// a `Mutex` and `Condvar`. -///

-/// It is perfectly safe to use this throughout the VM for locking. It is -/// meant to provide roughly the same functionality as ReentrantMutex combined with Condvar, -/// except: -///

    -///
  • This struct provides a faster slow path than ReentrantMutex.
  • -///
  • This struct provides a slower fast path than ReentrantMutex.
  • -///
  • This struct will work in the inner guts of the VM runtime because -/// it gives you the ability to lock and unlock, as well as wait and -/// notify, without using any other VM runtime functionality.
  • -///
  • This struct allows you to optionally block without letting the thread -/// system know that you are blocked. The benefit is that you can -/// perform synchronization without depending on VM thread subsystem functionality. -/// However, most of the time, you should use the methods that inform -/// the thread system that you are blocking. Methods that have the -/// `with_handshake` suffix will inform the thread system if you are blocked, -/// while methods that do not have the suffix will either not block -/// (as is the case with `unlock()` and `broadcast()`) -/// or will block without letting anyone know (like `lock_no_handshake()` -/// and `wait_no_handshake()`). Not letting the threading -/// system know that you are blocked may cause things like GC to stall -/// until you unblock.
  • -///
  • This struct does not provide mutable access to the protected data as it is unsound, -/// instead use `RefCell` to mutate the protected data.
  • -///
-pub struct Monitor { - mutex: Mutex, - cvar: Condvar, - rec_count: AtomicUsize, - holder: AtomicU64, -} - -impl Monitor { - pub const fn new(value: T) -> Self { - Self { - mutex: Mutex::new(value), - cvar: Condvar::new(), - rec_count: AtomicUsize::new(0), - holder: AtomicU64::new(0), - } - } - - pub fn lock_no_handshake(&self) -> MonitorGuard { - let my_slot = get_thread_id().get(); - let guard = if self.holder.load(Ordering::Relaxed) != my_slot { - let guard = self.mutex.lock(); - self.holder.store(my_slot, Ordering::Release); - MonitorGuard { - monitor: self, - guard: ManuallyDrop::new(guard), - } - } else { - MonitorGuard { - monitor: self, - guard: unsafe { ManuallyDrop::new(self.mutex.make_guard_unchecked()) }, - } - }; - self.rec_count.fetch_add(1, Ordering::Relaxed); - guard - } - - pub fn lock_with_handshake(&self) -> MonitorGuard { - let my_slot = get_thread_id().get(); - let guard = if my_slot != self.holder.load(Ordering::Relaxed) { - let guard = self.lock_with_handshake_no_rec::(); - self.holder.store(my_slot, Ordering::Release); - guard - } else { - MonitorGuard { - monitor: self, - guard: unsafe { ManuallyDrop::new(self.mutex.make_guard_unchecked()) }, - } - }; - self.rec_count.fetch_add(1, Ordering::Relaxed); - guard - } - - fn lock_with_handshake_no_rec(&self) -> MonitorGuard<'_, T> { - let tls = Thread::::current(); - tls.context.save_thread_state(); - - let mutex_guard = loop { - Thread::::enter_native(); - let guard = self.mutex.lock(); - if Thread::::attempt_leave_native_no_block() { - break guard; - } else { - drop(guard); - Thread::::leave_native(); - } - }; - - MonitorGuard { - monitor: self, - guard: ManuallyDrop::new(mutex_guard), - } - } - - pub fn notify(&self) { - self.cvar.notify_one(); - } - - pub fn notify_all(&self) { - self.cvar.notify_all(); - } - - pub unsafe fn relock_with_handshake( - &self, - rec_count: usize, - ) -> MonitorGuard<'_, T> { - let thread = Thread::::current(); - thread.context.save_thread_state(); - let guard = loop { - Thread::::enter_native(); - let lock = self.mutex.lock(); - if Thread::::attempt_leave_native_no_block() { - break lock; - } else { - drop(lock); - Thread::::leave_native(); - } - }; - - self.holder.store(get_thread_id().get(), Ordering::Relaxed); - self.rec_count.store(rec_count, Ordering::Relaxed); - MonitorGuard { - monitor: self, - guard: ManuallyDrop::new(guard), - } - } -} - -pub struct MonitorGuard<'a, T> { - monitor: &'a Monitor, - guard: ManuallyDrop>, -} - -impl Deref for MonitorGuard<'_, T> { - type Target = T; - - fn deref(&self) -> &Self::Target { - &self.guard - } -} - -impl<'a, T> MonitorGuard<'a, T> { - pub fn wait_no_handshake(&mut self) { - let rec_count = self.monitor.rec_count.swap(0, Ordering::Relaxed); - let holder = self.monitor.holder.swap(0, Ordering::Relaxed); - self.monitor.cvar.wait(&mut self.guard); - self.monitor.rec_count.store(rec_count, Ordering::Relaxed); - self.monitor.holder.store(holder, Ordering::Relaxed); - } - - pub fn wait_for_no_handshake(&mut self, timeout: Duration) -> WaitTimeoutResult { - let rec_count = self.monitor.rec_count.swap(0, Ordering::Relaxed); - let holder = self.monitor.holder.swap(0, Ordering::Relaxed); - let result = self.monitor.cvar.wait_for(&mut self.guard, timeout); - self.monitor.rec_count.store(rec_count, Ordering::Relaxed); - self.monitor.holder.store(holder, Ordering::Relaxed); - result - } - - pub fn notify(&self) { - self.monitor.cvar.notify_one(); - } - - pub fn notify_all(&self) { - self.monitor.cvar.notify_all(); - } - - pub fn monitor(&self) -> &Monitor { - self.monitor - } - - pub unsafe fn unlock_completely(&mut self) -> usize { - let result = self.monitor.rec_count.load(Ordering::Relaxed); - self.monitor.rec_count.store(0, Ordering::Relaxed); - self.monitor.holder.store(0, Ordering::Relaxed); - unsafe { - ManuallyDrop::drop(&mut self.guard); - } - result - } - - pub fn wait_with_handshake(mut self) -> Self { - let t = Thread::::current(); - t.context.save_thread_state(); - let rec_count = parked_scope::(|| { - self.wait_no_handshake(); - let rec_count = unsafe { self.unlock_completely() }; - rec_count - }); - unsafe { self.monitor.relock_with_handshake::(rec_count) } - } -} - -impl<'a, T> Drop for MonitorGuard<'a, T> { - fn drop(&mut self) { - if self.monitor.rec_count.fetch_sub(1, Ordering::Relaxed) == 1 { - self.monitor.holder.store(0, Ordering::Relaxed); - unsafe { ManuallyDrop::drop(&mut self.guard) }; - } - } -} +pub use monitor::*; +pub use super::threading::parked_scope; \ No newline at end of file diff --git a/vmkit/src/sync/monitor.rs b/vmkit/src/sync/monitor.rs new file mode 100644 index 0000000..75ddbfa --- /dev/null +++ b/vmkit/src/sync/monitor.rs @@ -0,0 +1,234 @@ + +use std::{ + mem::ManuallyDrop, + num::NonZeroU64, + ops::Deref, + sync::atomic::{AtomicU64, AtomicUsize, Ordering}, + time::Duration, +}; + +use parking_lot::{Condvar, Mutex, MutexGuard, WaitTimeoutResult}; + +use crate::{ + threading::{parked_scope, Thread, ThreadContext}, + VirtualMachine, +}; + +fn get_thread_id() -> NonZeroU64 { + thread_local! { + static KEY: u64 = 0; + } + KEY.with(|x| { + NonZeroU64::new(x as *const _ as u64).expect("thread-local variable address is null") + }) +} + +/// Implementation of a heavy lock and condition variable implemented using +/// the primitives available from the `parking_lot`. Currently we use +/// a `Mutex` and `Condvar`. +///

+/// It is perfectly safe to use this throughout the VM for locking. It is +/// meant to provide roughly the same functionality as ReentrantMutex combined with Condvar, +/// except: +///

    +///
  • This struct provides a faster slow path than ReentrantMutex.
  • +///
  • This struct provides a slower fast path than ReentrantMutex.
  • +///
  • This struct will work in the inner guts of the VM runtime because +/// it gives you the ability to lock and unlock, as well as wait and +/// notify, without using any other VM runtime functionality.
  • +///
  • This struct allows you to optionally block without letting the thread +/// system know that you are blocked. The benefit is that you can +/// perform synchronization without depending on VM thread subsystem functionality. +/// However, most of the time, you should use the methods that inform +/// the thread system that you are blocking. Methods that have the +/// `with_handshake` suffix will inform the thread system if you are blocked, +/// while methods that do not have the suffix will either not block +/// (as is the case with `unlock()` and `broadcast()`) +/// or will block without letting anyone know (like `lock_no_handshake()` +/// and `wait_no_handshake()`). Not letting the threading +/// system know that you are blocked may cause things like GC to stall +/// until you unblock.
  • +///
  • This struct does not provide mutable access to the protected data as it is unsound, +/// instead use `RefCell` to mutate the protected data.
  • +///
+pub struct Monitor { + mutex: Mutex, + cvar: Condvar, + rec_count: AtomicUsize, + holder: AtomicU64, +} + +impl Monitor { + pub const fn new(value: T) -> Self { + Self { + mutex: Mutex::new(value), + cvar: Condvar::new(), + rec_count: AtomicUsize::new(0), + holder: AtomicU64::new(0), + } + } + + pub fn lock_no_handshake(&self) -> MonitorGuard { + let my_slot = get_thread_id().get(); + let guard = if self.holder.load(Ordering::Relaxed) != my_slot { + let guard = self.mutex.lock(); + self.holder.store(my_slot, Ordering::Release); + MonitorGuard { + monitor: self, + guard: ManuallyDrop::new(guard), + } + } else { + MonitorGuard { + monitor: self, + guard: unsafe { ManuallyDrop::new(self.mutex.make_guard_unchecked()) }, + } + }; + self.rec_count.fetch_add(1, Ordering::Relaxed); + guard + } + + pub fn lock_with_handshake(&self) -> MonitorGuard { + let my_slot = get_thread_id().get(); + let guard = if my_slot != self.holder.load(Ordering::Relaxed) { + let guard = self.lock_with_handshake_no_rec::(); + self.holder.store(my_slot, Ordering::Release); + guard + } else { + MonitorGuard { + monitor: self, + guard: unsafe { ManuallyDrop::new(self.mutex.make_guard_unchecked()) }, + } + }; + self.rec_count.fetch_add(1, Ordering::Relaxed); + guard + } + + fn lock_with_handshake_no_rec(&self) -> MonitorGuard<'_, T> { + let tls = Thread::::current(); + tls.context.save_thread_state(); + + let mutex_guard = loop { + Thread::::enter_native(); + let guard = self.mutex.lock(); + if Thread::::attempt_leave_native_no_block() { + break guard; + } else { + drop(guard); + Thread::::leave_native(); + } + }; + + MonitorGuard { + monitor: self, + guard: ManuallyDrop::new(mutex_guard), + } + } + + pub fn notify(&self) { + self.cvar.notify_one(); + } + + pub fn notify_all(&self) { + self.cvar.notify_all(); + } + + pub unsafe fn relock_with_handshake( + &self, + rec_count: usize, + ) -> MonitorGuard<'_, T> { + let thread = Thread::::current(); + thread.context.save_thread_state(); + let guard = loop { + Thread::::enter_native(); + let lock = self.mutex.lock(); + if Thread::::attempt_leave_native_no_block() { + break lock; + } else { + drop(lock); + Thread::::leave_native(); + } + }; + + self.holder.store(get_thread_id().get(), Ordering::Relaxed); + self.rec_count.store(rec_count, Ordering::Relaxed); + MonitorGuard { + monitor: self, + guard: ManuallyDrop::new(guard), + } + } +} + +pub struct MonitorGuard<'a, T> { + monitor: &'a Monitor, + guard: ManuallyDrop>, +} + +impl Deref for MonitorGuard<'_, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.guard + } +} + +impl<'a, T> MonitorGuard<'a, T> { + pub fn wait_no_handshake(&mut self) { + let rec_count = self.monitor.rec_count.swap(0, Ordering::Relaxed); + let holder = self.monitor.holder.swap(0, Ordering::Relaxed); + self.monitor.cvar.wait(&mut self.guard); + self.monitor.rec_count.store(rec_count, Ordering::Relaxed); + self.monitor.holder.store(holder, Ordering::Relaxed); + } + + pub fn wait_for_no_handshake(&mut self, timeout: Duration) -> WaitTimeoutResult { + let rec_count = self.monitor.rec_count.swap(0, Ordering::Relaxed); + let holder = self.monitor.holder.swap(0, Ordering::Relaxed); + let result = self.monitor.cvar.wait_for(&mut self.guard, timeout); + self.monitor.rec_count.store(rec_count, Ordering::Relaxed); + self.monitor.holder.store(holder, Ordering::Relaxed); + result + } + + pub fn notify(&self) { + self.monitor.cvar.notify_one(); + } + + pub fn notify_all(&self) { + self.monitor.cvar.notify_all(); + } + + pub fn monitor(&self) -> &Monitor { + self.monitor + } + + pub unsafe fn unlock_completely(&mut self) -> usize { + let result = self.monitor.rec_count.load(Ordering::Relaxed); + self.monitor.rec_count.store(0, Ordering::Relaxed); + self.monitor.holder.store(0, Ordering::Relaxed); + unsafe { + ManuallyDrop::drop(&mut self.guard); + } + result + } + + pub fn wait_with_handshake(mut self) -> Self { + let t = Thread::::current(); + t.context.save_thread_state(); + let rec_count = parked_scope::(|| { + self.wait_no_handshake(); + let rec_count = unsafe { self.unlock_completely() }; + rec_count + }); + unsafe { self.monitor.relock_with_handshake::(rec_count) } + } +} + +impl<'a, T> Drop for MonitorGuard<'a, T> { + fn drop(&mut self) { + if self.monitor.rec_count.fetch_sub(1, Ordering::Relaxed) == 1 { + self.monitor.holder.store(0, Ordering::Relaxed); + unsafe { ManuallyDrop::drop(&mut self.guard) }; + } + } +} + diff --git a/vmkit/src/semaphore.rs b/vmkit/src/sync/semaphore.rs similarity index 100% rename from vmkit/src/semaphore.rs rename to vmkit/src/sync/semaphore.rs diff --git a/vmkit/src/threading.rs b/vmkit/src/threading.rs index e6e063b..8b09458 100644 --- a/vmkit/src/threading.rs +++ b/vmkit/src/threading.rs @@ -18,14 +18,14 @@ use mmtk::{ use parking_lot::Once; use crate::{ + machine_context::PlatformRegisters, mm::{ conservative_roots::ConservativeRoots, stack_bounds::{current_stack_pointer, StackBounds}, tlab::TLAB, AllocFastPath, MemoryManager, }, - semaphore::Semaphore, - sync::{Monitor, MonitorGuard}, + sync::{semaphore::Semaphore, Monitor, MonitorGuard}, VirtualMachine, }; @@ -195,6 +195,7 @@ pub struct Thread { should_block_for_gc: AtomicBool, stack_pointer: Atomic, + platform_registers: UnsafeCell>, suspend_count: AtomicUsize, /// The monitor of the thread. Protects access to the thread's state. monitor: Monitor<()>, @@ -264,6 +265,7 @@ impl Thread { should_block_for_gc: AtomicBool::new(false), monitor: Monitor::new(()), communication_lock: Monitor::new(()), + platform_registers: UnsafeCell::new(MaybeUninit::zeroed()), }) } @@ -445,6 +447,7 @@ impl Thread { self.add_about_to_terminate(); } + /// Start a main thread. pub fn main(context: VM::ThreadContext, f: F) -> Option where F: FnOnce() -> R + Send + 'static, @@ -739,6 +742,40 @@ impl Thread { self.check_block_no_save_context(); } + /// Save the thread's registers. + pub fn save_registers(&self) { + assert_eq!( + self.platform_handle(), + Self::current().platform_handle(), + "attempt to save registers of another thread" + ); + let sp = current_stack_pointer(); + self.stack_pointer.store(sp.as_usize(), Ordering::Relaxed); + unsafe { + cfgenius::cond! { + if macro(crate::macros::have_machine_context) { + let mut ucontext = MaybeUninit::::uninit(); + libc::getcontext(ucontext.as_mut_ptr()); + let registers = crate::machine_context::registers_from_ucontext(ucontext.as_ptr()).read(); + self.platform_registers.get().write(MaybeUninit::new(registers)); + } else { + self.platform_registers.get().write(MaybeUninit::new(crate::machine_context::PlatformRegisters { + stack_pointer: sp.as_usize() as _ + })); + + } + } + } + } + + /// Get the thread's registers. + /// + /// NOTE: Does not guarantee valid registers + /// nor that the returned value is the currently active registers. + pub fn get_registers(&self) -> PlatformRegisters { + unsafe { self.platform_registers.get().read().assume_init() } + } + /// Return this thread's stack pointer. /// /// Note: Does not guarantee that the returned value is currently active stack pointer. @@ -928,9 +965,15 @@ impl Thread { A::is_blocked(self) } + /// Change thread state to `InNative` and store the current stack pointer. + /// + /// This method will mark this thread as blocked for thread system and GC can run + /// concurrently with this thread. Note that native code *must* not access GC heap + /// as it can lead to undefined behavior. pub fn enter_native() { let t = Self::current(); - t.stack_pointer.store(current_stack_pointer().as_usize(), Ordering::Relaxed); + t.stack_pointer + .store(current_stack_pointer().as_usize(), Ordering::Relaxed); let mut old_state; loop { old_state = t.get_exec_status(); @@ -947,6 +990,10 @@ impl Thread { } } + /// Attempt to leave native state and return to managed state without blocking. + /// + /// If this method returns `false` you should call [`leave_native`](Self::leave_native) to block the thread or + /// spin wait untilt this method returns `true`. #[must_use = "If thread can't leave native state without blocking, call [leave_native](Thread::leave_native) instead"] pub fn attempt_leave_native_no_block() -> bool { let t = Self::current(); @@ -966,12 +1013,16 @@ impl Thread { } } + /// Leave native state and return to managed state. + /// + /// This method might block the thread if any block requestes are pending. pub fn leave_native() { if !Self::attempt_leave_native_no_block() { Self::current().leave_native_blocked(); } } + /// Unblock the thread and notify all waiting threads. pub fn unblock>(&self) { let lock = self.monitor.lock_no_handshake(); A::clear_block_request(self); @@ -980,10 +1031,12 @@ impl Thread { drop(lock); } + /// Are yieldpoints enabled for this thread? pub fn yieldpoints_enabled(&self) -> bool { self.yieldpoints_enabled_count.load(Ordering::Relaxed) == 1 } + /// Enable yieldpoints for this thread. pub fn enable_yieldpoints(&self) { let val = self .yieldpoints_enabled_count @@ -997,6 +1050,7 @@ impl Thread { } } + /// Disable yieldpoints for this thread. pub fn disable_yieldpoints(&self) { self.yieldpoints_enabled_count .fetch_sub(1, Ordering::Relaxed); @@ -1197,7 +1251,7 @@ impl Thread { let thread = Thread::::current(); let _was_at_yieldpoint = thread.at_yieldpoint.load(atomic::Ordering::Relaxed); thread.at_yieldpoint.store(true, atomic::Ordering::Relaxed); - + // If thread is in critical section we can't do anything right now, defer // until later // we do this without acquiring locks, since part of the point of disabling @@ -1275,6 +1329,18 @@ pub(crate) fn deinit_current_thread() { }) } +/// Thread management system. This type is responsible +/// for registering and managing all threads in the system. When GC +/// requests a thread to block, it will go through this type. +/// +/// Threads are suspended in two ways: +/// - 1) Yieldpoints are requested: this assumes that runtime is cooperative +/// and does periodically invoke check [`take_yieldpoint`](Thread::take_yieldpoint) to be non-zero and then +/// calls into [`Thread::yieldpoint`]. +/// - 2) Signals: this is used when runtime is uncooperative and runs without yieldpoints. We deliver +/// UNIX signals or Windows exceptions to suspend the thread. This method is less precise and does not +/// account for all locks held in the mutators, so it is less safe. It is highly discouraged to use +/// [`sync`](crate::sync) module in uncooperative mode. pub struct ThreadManager { inner: Monitor>>, soft_handshake_left: AtomicUsize, @@ -1447,7 +1513,8 @@ impl ThreadManager { // Deal with terminating threads to ensure that all threads are either dead to MMTk or stopped above. self.process_about_to_terminate(); - let threads = self.inner + let threads = self + .inner .lock_no_handshake() .borrow() .threads @@ -1465,7 +1532,7 @@ impl ThreadManager { } } - threads + threads } else { self.process_about_to_terminate(); let mut handshake_threads = Vec::with_capacity(4); @@ -1844,6 +1911,7 @@ static TARGET_THREAD: AtomicPtr<()> = AtomicPtr::new(std::ptr::null_mut()); extern "C-unwind" fn signal_handler_suspend_resume( _signal: i32, _info: *const libc::siginfo_t, + _ucontext: *mut libc::c_void, ) { let target = TARGET_THREAD.load(Ordering::Relaxed).cast::>(); let thread = unsafe { target.as_ref().unwrap() }; @@ -1878,6 +1946,22 @@ extern "C-unwind" fn signal_handler_suspend_resume( thread .stack_pointer .store(approximate_stack_pointer.as_usize(), Ordering::Release); + + cfgenius::cond! { + if macro(crate::macros::have_machine_context) { + let user_context = _ucontext.cast::(); + unsafe { + thread.platform_registers.get().write(MaybeUninit::new(crate::machine_context::registers_from_ucontext(user_context).read())); + } + } else { + unsafe { + thread.platform_registers.get().write(MaybeUninit::new(crate::machine_context::PlatformRegisters { + stack_pointer: approximate_stack_pointer.as_usize() as *mut u8 + })) + } + } + } + // Allow suspend caller to see that this thread is suspended. // sem_post is async-signal-safe function. It means that we can call this from a signal handler. // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html#tag_02_04_03