From 50977a4667be1670ed9de7756e344926f66952b0 Mon Sep 17 00:00:00 2001 From: playX18 Date: Wed, 12 Feb 2025 20:42:03 +0700 Subject: [PATCH] BDWGC Shim --- Cargo.lock | 357 ++------ Cargo.toml | 3 +- vmkit/Cargo.toml | 16 +- vmkit/examples/binarytrees.c | 65 ++ vmkit/examples/binarytrees.rs | 260 ++++++ vmkit/src/bdwgc_shim.rs | 835 ++++++++++++++++++ vmkit/src/build.rs | 34 + vmkit/src/lib.rs | 12 +- vmkit/src/machine_context.rs | 2 + vmkit/src/main.rs | 259 +----- vmkit/src/mm.rs | 13 +- vmkit/src/mm/conservative_roots.rs | 11 +- vmkit/src/mm/scanning.rs | 2 +- vmkit/src/mm/stack_bounds.rs | 4 +- vmkit/src/object_model.rs | 3 +- vmkit/src/object_model/object.rs | 42 +- .../binarytrees.rs => src/platform.rs} | 0 vmkit/src/platform/wrapper.h | 0 vmkit/src/semaphore.rs | 40 + vmkit/src/sync.rs | 1 + vmkit/src/threading.rs | 529 ++++++++--- 21 files changed, 1796 insertions(+), 692 deletions(-) create mode 100644 vmkit/examples/binarytrees.c create mode 100644 vmkit/examples/binarytrees.rs create mode 100644 vmkit/src/bdwgc_shim.rs create mode 100644 vmkit/src/build.rs create mode 100644 vmkit/src/machine_context.rs rename vmkit/{benches/binarytrees.rs => src/platform.rs} (100%) create mode 100644 vmkit/src/platform/wrapper.h create mode 100644 vmkit/src/semaphore.rs diff --git a/Cargo.lock b/Cargo.lock index c79290c..2677185 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11,12 +11,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - [[package]] name = "anstream" version = "0.6.18" @@ -113,12 +107,6 @@ dependencies = [ "git2", ] -[[package]] -name = "bumpalo" -version = "3.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" - [[package]] name = "bytemuck" version = "1.21.0" @@ -145,12 +133,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - [[package]] name = "cc" version = "1.2.12" @@ -168,33 +150,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - [[package]] name = "clap" version = "4.5.28" @@ -247,42 +202,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" -[[package]] -name = "criterion" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" -dependencies = [ - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "is-terminal", - "itertools 0.10.5", - "num-traits", - "once_cell", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" -dependencies = [ - "cast", - "itertools 0.10.5", -] - [[package]] name = "crossbeam" version = "0.8.4" @@ -339,12 +258,6 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" -[[package]] -name = "crunchy" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" - [[package]] name = "delegate" version = "0.12.0" @@ -477,16 +390,6 @@ dependencies = [ "url", ] -[[package]] -name = "half" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" -dependencies = [ - "cfg-if", - "crunchy", -] - [[package]] name = "heck" version = "0.5.0" @@ -667,15 +570,6 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" -[[package]] -name = "itertools" -version = "0.10.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.12.1" @@ -685,12 +579,6 @@ dependencies = [ "either", ] -[[package]] -name = "itoa" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" - [[package]] name = "jobserver" version = "0.1.32" @@ -700,16 +588,6 @@ dependencies = [ "libc", ] -[[package]] -name = "js-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - [[package]] name = "lazy_static" version = "1.5.0" @@ -800,7 +678,7 @@ dependencies = [ "enum-map", "env_logger", "is-terminal", - "itertools 0.12.1", + "itertools", "lazy_static", "libc", "log", @@ -816,7 +694,7 @@ dependencies = [ "static_assertions", "strum", "strum_macros", - "sysinfo", + "sysinfo 0.30.13", ] [[package]] @@ -928,12 +806,6 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" -[[package]] -name = "oorandom" -version = "11.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" - [[package]] name = "parking_lot" version = "0.12.3" @@ -969,34 +841,6 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - [[package]] name = "portable-atomic" version = "1.10.0" @@ -1164,21 +1008,6 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" -[[package]] -name = "ryu" -version = "1.0.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - [[package]] name = "scopeguard" version = "1.2.0" @@ -1211,18 +1040,6 @@ dependencies = [ "syn 2.0.98", ] -[[package]] -name = "serde_json" -version = "1.0.138" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949" -dependencies = [ - "itoa", - "memchr", - "ryu", - "serde", -] - [[package]] name = "shlex" version = "1.3.0" @@ -1325,7 +1142,21 @@ dependencies = [ "ntapi", "once_cell", "rayon", - "windows", + "windows 0.52.0", +] + +[[package]] +name = "sysinfo" +version = "0.33.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fc858248ea01b66f19d8e8a6d55f41deaf91e9d495246fd01368d99935c6c01" +dependencies = [ + "core-foundation-sys", + "libc", + "memchr", + "ntapi", + "rayon", + "windows 0.57.0", ] [[package]] @@ -1338,16 +1169,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "unicode-ident" version = "1.0.16" @@ -1403,7 +1224,6 @@ dependencies = [ "bytemuck", "cfg-if", "clap", - "criterion", "easy-bitfield", "env_logger", "errno", @@ -1412,6 +1232,7 @@ dependencies = [ "mmtk", "parking_lot", "rand", + "sysinfo 0.33.1", "winapi", ] @@ -1424,16 +1245,6 @@ dependencies = [ "syn 2.0.98", ] -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - [[package]] name = "wasi" version = "0.13.3+wasi-0.2.2" @@ -1443,74 +1254,6 @@ dependencies = [ "wit-bindgen-rt", ] -[[package]] -name = "wasm-bindgen" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" -dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn 2.0.98", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.98", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "web-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "winapi" version = "0.3.9" @@ -1527,15 +1270,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" -[[package]] -name = "winapi-util" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" -dependencies = [ - "windows-sys", -] - [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -1548,7 +1282,17 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ - "windows-core", + "windows-core 0.52.0", + "windows-targets", +] + +[[package]] +name = "windows" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" +dependencies = [ + "windows-core 0.57.0", "windows-targets", ] @@ -1561,6 +1305,49 @@ dependencies = [ "windows-targets", ] +[[package]] +name = "windows-core" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ed2439a290666cd67ecce2b0ffaad89c2a56b976b736e6ece670297897832d" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-result", + "windows-targets", +] + +[[package]] +name = "windows-implement" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "windows-interface" +version = "0.57.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", +] + +[[package]] +name = "windows-result" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e383302e8ec8515204254685643de10811af0ed97ea37210dc26fb0032647f8" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.59.0" diff --git a/Cargo.toml b/Cargo.toml index 235f914..4f4ba3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,4 +4,5 @@ default-members = ["vmkit"] resolver = "2" [profile.release] -lto="fat" \ No newline at end of file +lto="fat" +debug=true \ No newline at end of file diff --git a/vmkit/Cargo.toml b/vmkit/Cargo.toml index 613d972..4a0d6c8 100644 --- a/vmkit/Cargo.toml +++ b/vmkit/Cargo.toml @@ -16,12 +16,18 @@ log = "0.4.25" mmtk = { git = "https://github.com/mmtk/mmtk-core" } parking_lot = "0.12.3" rand = "0.9.0" +sysinfo = "0.33.1" [features] -default = ["cooperative"] + + +default = ["uncooperative"] + +uncooperative = ["cooperative"] + # VMKit is built for use in cooperative runtime. Such runtime # would be able to use write barriers and safepoints. Such environment # must also provide precise object layout (stack can be uncooperative). @@ -34,9 +40,5 @@ full-precise = [] [target.'cfg(windows)'.dependencies] winapi = { version = "0.3.9", features = ["everything"] } -[dev-dependencies] -criterion = "0.5.1" - -[[bench]] -name = "binarytrees" -harness = false +[lib] +crate-type = ["cdylib", "rlib"] diff --git a/vmkit/examples/binarytrees.c b/vmkit/examples/binarytrees.c new file mode 100644 index 0000000..d0edd6e --- /dev/null +++ b/vmkit/examples/binarytrees.c @@ -0,0 +1,65 @@ +/* Binary trees implemented in C using the BDWGC API. When running in VMKit build it, then link with `libvmkit.so` instead of `libgc.so`. */ + +#include +#include +#include + +typedef struct Node { + struct Node *left; + struct Node *right; +} Node; + +Node* leaf() { + return GC_malloc(sizeof(Node)); +} + +Node* new_node(Node* left, Node* right) { + Node* node = GC_malloc(sizeof(Node)); + node->left = left; + node->right = right; + return node; +} + +int itemCheck(Node* node) { + if (node->left == NULL) { + return 1; + } + return 1 + itemCheck(node->left) + itemCheck(node->right); +} + +Node* bottomUpTree(int depth) { + if (depth > 0) { + return new_node(bottomUpTree(depth - 1), bottomUpTree(depth - 1)); + } + return leaf(); +} + +int main() { + GC_use_entire_heap = 1; + GC_init(); + + + int maxDepth = 21; + int stretchDepth = maxDepth + 1; + int start = clock(); + Node* stretchTree = bottomUpTree(stretchDepth); + printf("stretch tree of depth %d\n", stretchDepth); + printf("time: %f\n", ((double)clock() - start) / CLOCKS_PER_SEC); + + Node* longLivedTree = bottomUpTree(maxDepth); + + for (int d = 4; d <= maxDepth; d += 2) { + int iterations = 1 << (maxDepth - d + 4); + int check = 0; + for (int i = 0; i < iterations; i++) { + Node* treeNode = bottomUpTree(d); + check += itemCheck(treeNode); + } + printf("%d\t trees of depth %d\t check: %d\n", iterations, d, check); + } + + printf("long lived tree of depth %d\t check: %d\n", maxDepth, itemCheck(longLivedTree)); + printf("time: %f\n", ((double)clock() - start) / CLOCKS_PER_SEC); + + return 0; +} diff --git a/vmkit/examples/binarytrees.rs b/vmkit/examples/binarytrees.rs new file mode 100644 index 0000000..62dc149 --- /dev/null +++ b/vmkit/examples/binarytrees.rs @@ -0,0 +1,260 @@ +use mmtk::util::Address; +use mmtk::{util::options::PlanSelector, vm::slot::SimpleSlot, AllocationSemantics, MMTKBuilder}; +use std::cell::RefCell; +use std::mem::offset_of; +use std::sync::Arc; +use std::sync::OnceLock; +use vmkit::threading::parked_scope; +use vmkit::{ + mm::{traits::Trace, MemoryManager}, + object_model::{ + metadata::{GCMetadata, TraceCallback}, + object::VMKitObject, + }, + sync::Monitor, + threading::{GCBlockAdapter, Thread, ThreadContext}, + VMKit, VirtualMachine, +}; + +#[repr(C)] +struct Node { + left: NodeRef, + right: NodeRef, +} + +static METADATA: GCMetadata = GCMetadata { + trace: TraceCallback::TraceObject(|object, tracer| unsafe { + let node = object.as_address().as_mut_ref::(); + node.left.0.trace_object(tracer); + node.right.0.trace_object(tracer); + }), + instance_size: size_of::(), + compute_size: None, + alignment: 16, +}; + +struct BenchVM { + vmkit: VMKit, +} + +static VM: OnceLock = OnceLock::new(); + +struct ThreadBenchContext; + +impl ThreadContext for ThreadBenchContext { + fn new(_: bool) -> Self { + Self + } + fn save_thread_state(&self) {} + + fn scan_roots( + &self, + _factory: impl mmtk::vm::RootsWorkFactory<::Slot>, + ) { + } + + fn scan_conservative_roots( + &self, + _croots: &mut vmkit::mm::conservative_roots::ConservativeRoots, + ) { + } +} + +impl VirtualMachine for BenchVM { + type BlockAdapterList = (GCBlockAdapter, ()); + type Metadata = &'static GCMetadata; + type Slot = SimpleSlot; + type ThreadContext = ThreadBenchContext; + fn get() -> &'static Self { + VM.get().unwrap() + } + + fn vmkit(&self) -> &VMKit { + &self.vmkit + } + + fn prepare_for_roots_re_scanning() {} + + fn notify_initial_thread_scan_complete(partial_scan: bool, tls: mmtk::util::VMWorkerThread) { + let _ = partial_scan; + let _ = tls; + } + + fn forward_weak_refs( + _worker: &mut mmtk::scheduler::GCWorker>, + _tracer_context: impl mmtk::vm::ObjectTracerContext>, + ) { + } + + fn scan_roots_in_mutator_thread( + _tls: mmtk::util::VMWorkerThread, + _mutator: &'static mut mmtk::Mutator>, + _factory: impl mmtk::vm::RootsWorkFactory< + as mmtk::vm::VMBinding>::VMSlot, + >, + ) { + } + + fn scan_vm_specific_roots( + _tls: mmtk::util::VMWorkerThread, + _factory: impl mmtk::vm::RootsWorkFactory< + as mmtk::vm::VMBinding>::VMSlot, + >, + ) { + } +} + +#[repr(transparent)] +#[derive(Clone, Copy, PartialEq, Eq)] +struct NodeRef(VMKitObject); + +impl NodeRef { + pub fn new(thread: &Thread, left: NodeRef, right: NodeRef) -> Self { + let node = MemoryManager::::allocate( + thread, + size_of::(), + 16, + &METADATA, + AllocationSemantics::Default, + ); + + node.set_field_object::(offset_of!(Node, left), left.0); + node.set_field_object::(offset_of!(Node, right), right.0); + + Self(node) + } + + pub fn left(self) -> NodeRef { + unsafe { + let node = self.0.as_address().as_ref::(); + node.left + } + } + + pub fn right(self) -> NodeRef { + unsafe { + let node = self.0.as_address().as_ref::(); + node.right + } + } + + pub fn null() -> Self { + Self(VMKitObject::NULL) + } + + pub fn item_check(&self) -> usize { + if self.left() == NodeRef::null() { + 1 + } else { + 1 + self.left().item_check() + self.right().item_check() + } + } + + pub fn leaf(thread: &Thread) -> Self { + Self::new(thread, NodeRef::null(), NodeRef::null()) + } +} + +fn bottom_up_tree(thread: &Thread, depth: usize) -> NodeRef { + if thread.take_yieldpoint() != 0 { + Thread::::yieldpoint(0, Address::ZERO); + } + if depth > 0 { + NodeRef::new( + thread, + bottom_up_tree(thread, depth - 1), + bottom_up_tree(thread, depth - 1), + ) + } else { + NodeRef::leaf(thread) + } +} + +const MIN_DEPTH: usize = 4; + +fn main() { + env_logger::init(); + let nthreads = std::env::var("THREADS") + .unwrap_or("4".to_string()) + .parse::() + .unwrap(); + let mut builder = MMTKBuilder::new(); + builder.options.plan.set(PlanSelector::Immix); + builder.options.threads.set(nthreads); + builder + .options + .gc_trigger + .set(mmtk::util::options::GCTriggerSelector::DynamicHeapSize( + 4 * 1024 * 1024 * 1024, + 16 * 1024 * 1024 * 1024, + )); + VM.set(BenchVM { + vmkit: VMKit::new(&mut builder), + }) + .unwrap_or_else(|_| panic!()); + + Thread::::main(ThreadBenchContext, || { + let thread = Thread::::current(); + let start = std::time::Instant::now(); + let n = std::env::var("DEPTH") + .unwrap_or("18".to_string()) + .parse::() + .unwrap(); + let max_depth = if n < MIN_DEPTH + 2 { MIN_DEPTH + 2 } else { n }; + + let stretch_depth = max_depth + 1; + + println!("stretch tree of depth {stretch_depth}"); + + let _ = bottom_up_tree(&thread, stretch_depth); + let duration = start.elapsed(); + println!("time: {duration:?}"); + + let results = Arc::new(Monitor::new(vec![ + RefCell::new(String::new()); + (max_depth - MIN_DEPTH) / 2 + 1 + ])); + + let mut handles = Vec::new(); + + for d in (MIN_DEPTH..=max_depth).step_by(2) { + let depth = d; + + let thread = Thread::::for_mutator(ThreadBenchContext); + let results = results.clone(); + let handle = thread.start(move || { + let thread = Thread::::current(); + let mut check = 0; + + let iterations = 1 << (max_depth - depth + MIN_DEPTH); + for _ in 1..=iterations { + let tree_node = bottom_up_tree(&thread, depth); + check += tree_node.item_check(); + } + + *results.lock_with_handshake::()[(depth - MIN_DEPTH) / 2].borrow_mut() = + format!("{iterations}\t trees of depth {depth}\t check: {check}"); + }); + handles.push(handle); + } + println!("created {} threads", handles.len()); + + parked_scope::<(), BenchVM>(|| { + while let Some(handle) = handles.pop() { + handle.join().unwrap(); + } + }); + + for result in results.lock_with_handshake::().iter() { + println!("{}", result.borrow()); + } + + println!( + "long lived tree of depth {max_depth}\t check: {}", + bottom_up_tree(&thread, max_depth).item_check() + ); + + let duration = start.elapsed(); + println!("time: {duration:?}"); + }); +} diff --git a/vmkit/src/bdwgc_shim.rs b/vmkit/src/bdwgc_shim.rs new file mode 100644 index 0000000..9a975b2 --- /dev/null +++ b/vmkit/src/bdwgc_shim.rs @@ -0,0 +1,835 @@ +//! # BDWGC shim +//! +//! This file provides a shim for BDWGC APIs. It is used to provide a compatibility layer between MMTk and BDWGC. +//! +//! # Notes +//! +//! This shim is highly experimental and not all BDWGC APIs are implemented. +#![allow(non_upper_case_globals)] +use std::{ + collections::HashSet, + ffi::CStr, + mem::transmute, + sync::{Arc, Barrier, LazyLock, OnceLock}, +}; + +use crate::{ + mm::{conservative_roots::ConservativeRoots, traits::ToSlot, MemoryManager}, + object_model::{ + metadata::{GCMetadata, Metadata, TraceCallback}, + object::VMKitObject, + }, + threading::{GCBlockAdapter, Thread, ThreadContext}, + VMKit, VirtualMachine, +}; +use easy_bitfield::*; +use mmtk::{util::Address, vm::slot::SimpleSlot, AllocationSemantics, MMTKBuilder}; +use parking_lot::{Mutex, Once}; +use sysinfo::{MemoryRefreshKind, RefreshKind}; + +/// A BDWGC type that implements VirtualMachine. +pub struct BDWGC { + vmkit: VMKit, + roots: Mutex>, +} + +pub struct BDWGCThreadContext; + +impl ThreadContext for BDWGCThreadContext { + fn new(_: bool) -> Self { + Self + } + + fn save_thread_state(&self) {} + + fn scan_conservative_roots( + &self, + croots: &mut crate::mm::conservative_roots::ConservativeRoots, + ) { + let _ = croots; + } + + fn scan_roots( + &self, + factory: impl mmtk::vm::RootsWorkFactory<::Slot>, + ) { + let _ = factory; + } +} + +static BDWGC_VM: OnceLock = OnceLock::new(); + +impl VirtualMachine for BDWGC { + type ThreadContext = BDWGCThreadContext; + type BlockAdapterList = (GCBlockAdapter, ()); + type Metadata = BDWGCMetadata; + type Slot = SimpleSlot; + + fn get() -> &'static Self { + BDWGC_VM.get().expect("GC is not initialized") + } + + fn vmkit(&self) -> &VMKit { + &self.vmkit + } + + fn vm_live_bytes() -> usize { + 0 + } + + fn prepare_for_roots_re_scanning() {} + + fn forward_weak_refs( + _worker: &mut mmtk::scheduler::GCWorker>, + _tracer_context: impl mmtk::vm::ObjectTracerContext>, + ) { + } + + fn scan_roots_in_mutator_thread( + tls: mmtk::util::VMWorkerThread, + mutator: &'static mut mmtk::Mutator>, + factory: impl mmtk::vm::RootsWorkFactory< + as mmtk::vm::VMBinding>::VMSlot, + >, + ) { + let _ = tls; + let _ = mutator; + let _ = factory; + } + + fn scan_vm_specific_roots( + tls: mmtk::util::VMWorkerThread, + mut factory: impl mmtk::vm::RootsWorkFactory< + as mmtk::vm::VMBinding>::VMSlot, + >, + ) { + let _ = tls; + let mut croots = ConservativeRoots::new(); + + unsafe { + croots.add_span(gc_data_start(), gc_data_end()); + } + + for (low, high) in BDWGC::get().roots.lock().iter() { + unsafe { + croots.add_span(*low, *high); + } + } + + croots.add_to_factory(&mut factory); + } + + fn notify_initial_thread_scan_complete(_partial_scan: bool, _tls: mmtk::util::VMWorkerThread) {} + + fn post_forwarding(_tls: mmtk::util::VMWorkerThread) {} + + fn out_of_memory(tls: mmtk::util::VMThread, err_kind: mmtk::util::alloc::AllocationError) { + let _ = tls; + let _ = err_kind; + + unsafe { + if let Some(oom_func) = OOM_FUNC { + oom_func(0); + } else { + eprintln!("Out of memory: {:?}", err_kind); + std::process::exit(1); + } + } + } +} + +type VTableAddress = BitField; +type IsAtomic = BitField; +#[allow(dead_code)] +type HasVTable = BitField; +/// Object size in words, overrides [VTableAddress] as if vtable is present, object size must be available +/// through it. +type ObjectSize = BitField; + +/// An object metadata. This allows GC to scan object fields. When you don't use `gcj` API and don't provide vtable +/// this type simply stores object size and whether it is ATOMIC or no. +pub struct BDWGCMetadata { + meta: usize, +} + +impl ToSlot for BDWGCMetadata { + fn to_slot(&self) -> Option { + None + } +} + +static CONSERVATIVE_METADATA: GCMetadata = GCMetadata { + alignment: 8, + instance_size: 0, + compute_size: Some(|object| { + let header = object.header::().metadata(); + ObjectSize::decode(header.meta) * BDWGC::MIN_ALIGNMENT + }), + + trace: TraceCallback::TraceObject(|object, tracer| unsafe { + let is_atomic = IsAtomic::decode(object.header::().metadata().meta); + if is_atomic { + return; + } + println!("trace {:?}", object.object_start::()); + let size = object.bytes_used::(); + + let mut cursor = object.object_start::(); + let end = cursor + size; + + while cursor < end { + let word = cursor.load::
(); + if let Some(object) = mmtk::memory_manager::find_object_from_internal_pointer(word, 128) + { + tracer.trace_object(object); + } + + cursor += BDWGC::MIN_ALIGNMENT; + } + }), +}; + +impl FromBitfield for BDWGCMetadata { + fn from_bitfield(value: u64) -> Self { + Self { + meta: value as usize, + } + } + + fn from_i64(value: i64) -> Self { + Self::from_bitfield(value as u64) + } +} + +impl ToBitfield for BDWGCMetadata { + fn to_bitfield(self) -> u64 { + self.meta as u64 + } + + fn one() -> Self { + Self::from_bitfield(1) + } + + fn zero() -> Self { + Self::from_bitfield(0) + } +} + +impl Metadata for BDWGCMetadata { + const METADATA_BIT_SIZE: usize = 58; + fn from_object_reference(_reference: mmtk::util::ObjectReference) -> Self { + todo!("GCJ-style metadata") + } + + fn to_object_reference(&self) -> Option { + todo!("GCJ-style metadata") + } + + fn is_object(&self) -> bool { + false + } + + fn gc_metadata(&self) -> &'static crate::object_model::metadata::GCMetadata { + &CONSERVATIVE_METADATA + } +} + +static INIT: Once = Once::new(); + +#[no_mangle] +pub static mut GC_VERBOSE: i32 = 0; + +static BUILDER: LazyLock> = LazyLock::new(|| Mutex::new(MMTKBuilder::new())); + +#[no_mangle] +pub extern "C-unwind" fn GC_get_parallel() -> libc::c_int { + *BUILDER.lock().options.threads as _ +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_markers_count(count: libc::c_int) { + BUILDER.lock().options.threads.set(count as _); +} + +static mut OOM_FUNC: Option *mut libc::c_void> = None; + +#[no_mangle] +pub extern "C-unwind" fn GC_set_oom_fn(func: extern "C" fn(usize) -> *mut libc::c_void) { + unsafe { OOM_FUNC = Some(func) }; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_oom_fn() -> extern "C" fn(usize) -> *mut libc::c_void { + unsafe { OOM_FUNC.unwrap() } +} + +#[no_mangle] +pub extern "C-unwind" fn GC_init() { + INIT.call_once(|| unsafe { + env_logger::init_from_env("GC_VERBOSE"); + + let mut builder = BUILDER.lock(); + builder + .options + .plan + .set(mmtk::util::options::PlanSelector::Immix); + if GC_use_entire_heap != 0 { + let mem = sysinfo::System::new_with_specifics( + RefreshKind::nothing().with_memory(MemoryRefreshKind::nothing().with_ram()), + ); + builder + .options + .gc_trigger + .set(mmtk::util::options::GCTriggerSelector::FixedHeapSize( + (mem.total_memory() as f64 * 0.5f64) as usize, + )); + } + + let vm = BDWGC { + vmkit: VMKit::new(&mut builder), + roots: Mutex::new(HashSet::new()), + }; + + BDWGC_VM.set(vm).unwrap_or_else(|_| { + eprintln!("GC already initialized"); + std::process::exit(1); + }); + + Thread::::register_mutator_manual(); + mmtk::memory_manager::initialize_collection( + &BDWGC::get().vmkit().mmtk, + transmute(Thread::::current()), + ) + }); +} + +#[no_mangle] +pub extern "C-unwind" fn GC_register_mutator() { + unsafe { Thread::::register_mutator_manual() }; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_unregister_mutator() { + unsafe { Thread::::unregister_mutator_manual() }; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_pthread_create( + thread_ptr: &mut libc::pthread_t, + _: &libc::pthread_attr_t, + start_routine: extern "C" fn(*mut libc::c_void), + arg: *mut libc::c_void, +) -> libc::c_int { + let barrier = Arc::new(Barrier::new(1)); + let barrier2 = barrier.clone(); + + let thread = Thread::::for_mutator(BDWGCThreadContext); + let addr = Address::from_ref(thread_ptr); + let arg = Address::from_mut_ptr(arg); + thread.start(move || unsafe { + barrier2.wait(); + let thread = Thread::::current(); + addr.store(thread.platform_handle()); + start_routine(arg.to_mut_ptr()); + }); + + barrier.wait(); + + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_pthread_exit(retval: *mut libc::c_void) { + let thread = Thread::::current(); + unsafe { + thread.terminate(); + libc::pthread_exit(retval); + } +} + +#[no_mangle] +pub extern "C-unwind" fn GC_pthread_join( + thread: libc::pthread_t, + retval: *mut *mut libc::c_void, +) -> libc::c_int { + unsafe { libc::pthread_join(thread, retval) } +} + +#[no_mangle] +pub extern "C-unwind" fn GC_gcollect() { + MemoryManager::::request_gc(); +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_find_leak(_: libc::c_int) {} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_find_leak() -> libc::c_int { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_all_interior_pointers(_: libc::c_int) {} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_all_interior_pointers() -> libc::c_int { + 1 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_finalize_on_demand(_: libc::c_int) {} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_finalize_on_demand() -> libc::c_int { + 0 +} + +#[no_mangle] +pub static mut GC_use_entire_heap: libc::c_int = 0; + +#[no_mangle] +pub extern "C-unwind" fn GC_set_full_freq(freq: libc::c_int) { + let _ = freq; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_full_freq() -> libc::c_int { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_non_gc_bytes(bytes: libc::c_ulong) { + let _ = bytes; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_non_gc_bytes() -> libc::c_ulong { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_no_dls(_: libc::c_int) {} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_no_dls() -> libc::c_int { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_free_space_divisor(divisor: libc::c_ulong) { + let _ = divisor; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_free_space_divisor() -> libc::c_ulong { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_max_retries(retries: libc::c_ulong) { + let _ = retries; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_max_retries() -> libc::c_ulong { + 0 +} + +#[no_mangle] +pub static mut GC_stackbottom: *mut libc::c_void = std::ptr::null_mut(); + +#[no_mangle] +pub static mut GC_dont_precollect: libc::c_int = 0; + +#[no_mangle] +pub extern "C-unwind" fn GC_set_dont_precollect(dont_precollect: libc::c_int) { + unsafe { GC_dont_precollect = dont_precollect }; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_dont_precollect() -> libc::c_int { + unsafe { GC_dont_precollect } +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_time_limit(limit: libc::c_ulong) { + let _ = limit; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_time_limit() -> libc::c_ulong { + 0 +} + +#[repr(C)] +#[allow(non_camel_case_types)] +pub struct GC_timeval_s { + tv_sec: libc::c_long, + tv_usec: libc::c_long, +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_time_limit_tv(limit: GC_timeval_s) { + let _ = limit; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_time_limit_tv() -> GC_timeval_s { + GC_timeval_s { + tv_sec: 0, + tv_usec: 0, + } +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_allocd_bytes_per_finalizer(bytes: libc::c_ulong) { + let _ = bytes; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_allocd_bytes_per_finalizer() -> libc::c_ulong { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_start_performance_measurement() {} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_full_gc_total_time() -> libc::c_ulong { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_stopped_mark_total_time() -> libc::c_ulong { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_avg_stopped_mark_time_ns() -> libc::c_ulong { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_pages_executable(executable: libc::c_int) { + let _ = executable; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_pages_executable() -> libc::c_int { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_min_bytes_allocd(bytes: libc::c_ulong) { + let _ = bytes; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_min_bytes_allocd() -> libc::c_ulong { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_max_prior_attempts(attempts: libc::c_int) { + let _ = attempts; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_max_prior_attempts() -> libc::c_int { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_handle_fork(handle: libc::c_int) { + let _ = handle; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_atfork_prepare() { + BDWGC::get().vmkit().mmtk.prepare_to_fork(); +} + +#[no_mangle] +pub extern "C-unwind" fn GC_atfork_parent() { + let thread = Thread::::current(); + BDWGC::get() + .vmkit() + .mmtk + .after_fork(unsafe { transmute(thread) }); +} + +#[no_mangle] +pub extern "C-unwind" fn GC_atfork_child() { + let thread = Thread::::current(); + BDWGC::get() + .vmkit() + .mmtk + .after_fork(unsafe { transmute(thread) }); +} + +#[no_mangle] +pub extern "C-unwind" fn GC_is_init_called() -> libc::c_int { + INIT.state().done() as _ +} + +#[no_mangle] +pub extern "C-unwind" fn GC_deinit() {} + +#[no_mangle] +pub extern "C-unwind" fn GC_malloc(size: usize) -> *mut libc::c_void { + let vtable = BDWGCMetadata { + meta: ObjectSize::encode(size / BDWGC::MIN_ALIGNMENT), + }; + + MemoryManager::::allocate( + Thread::::current(), + size, + BDWGC::MIN_ALIGNMENT, + vtable, + AllocationSemantics::Default, + ) + .as_address() + .to_mut_ptr() +} + +#[no_mangle] +pub extern "C-unwind" fn GC_malloc_atomic(size: usize) -> *mut libc::c_void { + let vtable = BDWGCMetadata { + meta: ObjectSize::encode(size / BDWGC::MIN_ALIGNMENT) | IsAtomic::encode(true), + }; + + MemoryManager::::allocate( + Thread::::current(), + size, + BDWGC::MIN_ALIGNMENT, + vtable, + AllocationSemantics::Default, + ) + .as_address() + .to_mut_ptr() +} + +#[no_mangle] +pub extern "C-unwind" fn GC_strdup(s: *const libc::c_char) -> *mut libc::c_char { + let s = unsafe { CStr::from_ptr(s) }; + let buf = s.to_string_lossy(); + let bytes = buf.as_bytes(); + let ns = GC_malloc_atomic(bytes.len()); + unsafe { + std::ptr::copy_nonoverlapping(bytes.as_ptr(), ns as *mut u8, bytes.len()); + } + ns.cast() +} + +#[no_mangle] +pub extern "C-unwind" fn GC_strndup(s: *const libc::c_char, n: usize) -> *mut libc::c_char { + let ns = GC_malloc_atomic(n); + unsafe { + std::ptr::copy_nonoverlapping(s, ns as *mut i8, n); + } + ns.cast() +} + +#[no_mangle] +pub extern "C-unwind" fn GC_malloc_uncollectable(size: usize) -> *mut libc::c_void { + let vtable = BDWGCMetadata { + meta: ObjectSize::encode(size / BDWGC::MIN_ALIGNMENT), + }; + + MemoryManager::::allocate( + Thread::::current(), + size, + BDWGC::MIN_ALIGNMENT, + vtable, + AllocationSemantics::Immortal, + ) + .as_address() + .to_mut_ptr() +} + +#[no_mangle] +pub extern "C-unwind" fn GC_free(ptr: *mut libc::c_void) { + let _ = ptr; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_malloc_stubborn(size: usize) -> *mut libc::c_void { + GC_malloc(size) +} + +#[no_mangle] +pub extern "C-unwind" fn GC_base(ptr: *mut libc::c_void) -> *mut libc::c_void { + match mmtk::memory_manager::find_object_from_internal_pointer(Address::from_mut_ptr(ptr), 128) { + Some(object) => object.to_raw_address().to_mut_ptr(), + None => std::ptr::null_mut(), + } +} + +#[no_mangle] +pub extern "C-unwind" fn GC_is_heap_ptr(ptr: *mut libc::c_void) -> libc::c_int { + mmtk::memory_manager::is_mapped_address(Address::from_mut_ptr(ptr)) as _ +} + +#[no_mangle] +pub extern "C-unwind" fn GC_size(ptr: *mut libc::c_void) -> libc::c_ulong { + let object = + mmtk::memory_manager::find_object_from_internal_pointer(Address::from_mut_ptr(ptr), 128); + match object { + Some(object) => VMKitObject::from(object).bytes_used::() as _, + None => 0, + } +} + +#[no_mangle] +pub extern "C-unwind" fn GC_realloc(old: *mut libc::c_void, size: usize) -> *mut libc::c_void { + let header = VMKitObject::from_address(Address::from_mut_ptr(old)) + .header::() + .metadata(); + let mem = if IsAtomic::decode(header.meta) { + GC_malloc_atomic(size) + } else { + GC_malloc(size) + }; + + unsafe { + std::ptr::copy_nonoverlapping(old.cast::(), mem as *mut u8, size); + } + + mem +} + +#[no_mangle] +pub extern "C-unwind" fn GC_exclude_static_roots(low: *mut libc::c_void, high: *mut libc::c_void) { + let _ = low; + let _ = high; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_clear_exclusion_table() {} + +#[no_mangle] +pub extern "C-unwind" fn GC_clear_roots() { + BDWGC::get().roots.lock().clear(); +} + +#[no_mangle] +pub extern "C-unwind" fn GC_add_roots(low: *mut libc::c_void, high: *mut libc::c_void) { + BDWGC::get() + .roots + .lock() + .insert((Address::from_mut_ptr(low), Address::from_mut_ptr(high))); +} + +#[no_mangle] +pub extern "C-unwind" fn GC_remove_roots(low: *mut libc::c_void, high: *mut libc::c_void) { + BDWGC::get() + .roots + .lock() + .remove(&(Address::from_mut_ptr(low), Address::from_mut_ptr(high))); +} + +#[no_mangle] +pub extern "C-unwind" fn GC_register_displacement(displacement: usize) { + let _ = displacement; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_debug_register_displacement(displacement: usize) { + let _ = displacement; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_gcollect_and_unmap() { + GC_gcollect(); +} + +#[no_mangle] +pub extern "C-unwind" fn GC_try_to_collect() -> libc::c_int { + MemoryManager::::request_gc() as _ +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_stop_func(func: extern "C" fn() -> libc::c_int) { + let _ = func; +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_stop_func() -> Option libc::c_int> { + None +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_heap_size() -> libc::size_t { + mmtk::memory_manager::used_bytes(&BDWGC::get().vmkit().mmtk) as _ +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_free_bytes() -> libc::size_t { + mmtk::memory_manager::free_bytes(&BDWGC::get().vmkit().mmtk) as _ +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_unmapped_bytes() -> libc::size_t { + let mmtk = &BDWGC::get().vmkit().mmtk; + let total = mmtk::memory_manager::total_bytes(mmtk); + let used = mmtk::memory_manager::used_bytes(mmtk); + total - used +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_bytes_since_gc() -> libc::size_t { + let mmtk = &BDWGC::get().vmkit().mmtk; + let info = mmtk::memory_manager::live_bytes_in_last_gc(mmtk); + let total = info.iter().fold(0, |x, (_, stats)| stats.used_bytes + x); + total as _ +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_expl_freed_bytes_since_gc() -> libc::size_t { + 0 +} + +#[no_mangle] +pub extern "C-unwind" fn GC_get_total_bytes() -> libc::size_t { + let mmtk = &BDWGC::get().vmkit().mmtk; + mmtk::memory_manager::total_bytes(mmtk) as _ +} + +#[no_mangle] +pub extern "C-unwind" fn GC_malloc_ignore_off_page(size: usize) -> *mut libc::c_void { + GC_malloc(size) +} + +#[no_mangle] +pub extern "C-unwind" fn GC_malloc_atomic_ignore_off_page(size: usize) -> *mut libc::c_void { + GC_malloc_atomic(size) +} + +#[no_mangle] +pub extern "C-unwind" fn GC_set_warn_proc(_: *mut libc::c_void) {} + +cfg_if::cfg_if! { + if #[cfg(target_os="linux")] { + extern "C" { + static __data_start: *mut usize; + static __bss_start: *mut usize; + static _end: *mut usize; + } + + pub fn gc_data_start() -> Address { + unsafe { + println!("GC data start: {:p}", &__data_start); + Address::from_ptr(__data_start.cast::()) + } + } + + pub fn gc_data_end() -> Address { + unsafe { + Address::from_ptr(_end.cast::()) + } + } + + + + } +} diff --git a/vmkit/src/build.rs b/vmkit/src/build.rs new file mode 100644 index 0000000..0122784 --- /dev/null +++ b/vmkit/src/build.rs @@ -0,0 +1,34 @@ +use std::env; +use std::path::PathBuf; + +fn main() { + // Only generate bindings for macOS + if cfg!(target_os = "macos") { + // Tell cargo to invalidate the built crate whenever the wrapper changes + println!("cargo:rerun-if-changed=wrapper.h"); + + // The bindgen::Builder is the main entry point + // to bindgen, and lets you build up options for + // the resulting bindings. + let bindings = bindgen::Builder::default() + // The input header we would like to generate + // bindings for. + .header("wrapper.h") + // Tell bindgen to generate bindings for mach/thread_act.h + .clang_arg("-F/System/Library/Frameworks") + .allowlist_file("mach/thread_act.h") + // Tell cargo to invalidate the built crate whenever any of the + // included header files changed. + .parse_callbacks(Box::new(bindgen::CargoCallbacks::new())) + // Finish the builder and generate the bindings. + .generate() + // Unwrap the Result and panic on failure. + .expect("Unable to generate bindings"); + + // Write the bindings to the $OUT_DIR/bindings.rs file. + let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()); + bindings + .write_to_file(out_path.join("bindings.rs")) + .expect("Couldn't write bindings!"); + } +} \ No newline at end of file diff --git a/vmkit/src/lib.rs b/vmkit/src/lib.rs index 827e72c..997dd8a 100644 --- a/vmkit/src/lib.rs +++ b/vmkit/src/lib.rs @@ -1,15 +1,20 @@ use std::{marker::PhantomData, sync::atomic::AtomicBool}; use mm::{aslr::aslr_vm_layout, traits::SlotExtra, MemoryManager}; -use mmtk::{ MMTKBuilder, MMTK}; -use threading::ThreadManager; +use mmtk::{MMTKBuilder, MMTK}; +use threading::{initialize_threading, ThreadManager}; +pub mod machine_context; pub mod mm; pub mod object_model; pub mod options; +pub mod semaphore; pub mod sync; pub mod threading; +#[cfg(feature="uncooperative")] +pub mod bdwgc_shim; + pub trait VirtualMachine: Sized + 'static + Send + Sync { type ThreadContext: threading::ThreadContext; type BlockAdapterList: threading::BlockAdapterList; @@ -113,7 +118,8 @@ pub struct VMKit { } impl VMKit { - pub fn new(mut builder: MMTKBuilder) -> Self { + pub fn new(builder: &mut MMTKBuilder) -> Self { + initialize_threading::(); let vm_layout = aslr_vm_layout(&mut builder.options); builder.set_vm_layout(vm_layout); VMKit { diff --git a/vmkit/src/machine_context.rs b/vmkit/src/machine_context.rs new file mode 100644 index 0000000..139597f --- /dev/null +++ b/vmkit/src/machine_context.rs @@ -0,0 +1,2 @@ + + diff --git a/vmkit/src/main.rs b/vmkit/src/main.rs index 8b3e3d0..f328e4d 100644 --- a/vmkit/src/main.rs +++ b/vmkit/src/main.rs @@ -1,258 +1 @@ -use mmtk::util::options::AffinityKind; -use mmtk::util::Address; -use mmtk::{util::options::PlanSelector, vm::slot::SimpleSlot, AllocationSemantics, MMTKBuilder}; -use std::cell::RefCell; -use std::sync::OnceLock; -use std::sync::Arc; -use vmkit::threading::parked_scope; -use vmkit::{ - mm::{traits::Trace, MemoryManager}, - object_model::{ - metadata::{GCMetadata, TraceCallback}, - object::VMKitObject, - }, - sync::Monitor, - threading::{GCBlockAdapter, Thread, ThreadContext}, - VMKit, VirtualMachine, -}; - -#[repr(C)] -struct Node { - left: NodeRef, - right: NodeRef, -} - -static METADATA: GCMetadata = GCMetadata { - trace: TraceCallback::TraceObject(|object, tracer| unsafe { - let node = object.as_address().as_mut_ref::(); - node.left.0.trace_object(tracer); - node.right.0.trace_object(tracer); - }), - instance_size: size_of::(), - compute_size: None, - alignment: 16, -}; - -struct BenchVM { - vmkit: VMKit, -} - -static VM: OnceLock = OnceLock::new(); - -struct ThreadBenchContext; - -impl ThreadContext for ThreadBenchContext { - fn new(_: bool) -> Self { - Self - } - fn save_thread_state(&self) {} - - fn scan_roots( - &self, - _factory: impl mmtk::vm::RootsWorkFactory<::Slot>, - ) { - } - - fn scan_conservative_roots( - &self, - _croots: &mut vmkit::mm::conservative_roots::ConservativeRoots, - ) { - } -} - -impl VirtualMachine for BenchVM { - type BlockAdapterList = (GCBlockAdapter, ()); - type Metadata = &'static GCMetadata; - type Slot = SimpleSlot; - type ThreadContext = ThreadBenchContext; - fn get() -> &'static Self { - VM.get().unwrap() - } - - fn vmkit(&self) -> &VMKit { - &self.vmkit - } - - fn prepare_for_roots_re_scanning() {} - - fn notify_initial_thread_scan_complete(partial_scan: bool, tls: mmtk::util::VMWorkerThread) { - let _ = partial_scan; - let _ = tls; - } - - fn forward_weak_refs( - _worker: &mut mmtk::scheduler::GCWorker>, - _tracer_context: impl mmtk::vm::ObjectTracerContext>, - ) { - } - - fn scan_roots_in_mutator_thread( - _tls: mmtk::util::VMWorkerThread, - _mutator: &'static mut mmtk::Mutator>, - _factory: impl mmtk::vm::RootsWorkFactory< - as mmtk::vm::VMBinding>::VMSlot, - >, - ) { - } - - fn scan_vm_specific_roots( - _tls: mmtk::util::VMWorkerThread, - _factory: impl mmtk::vm::RootsWorkFactory< - as mmtk::vm::VMBinding>::VMSlot, - >, - ) { - } -} - -#[repr(transparent)] -#[derive(Clone, Copy, PartialEq, Eq)] -struct NodeRef(VMKitObject); - -impl NodeRef { - pub fn new(thread: &Thread, left: NodeRef, right: NodeRef) -> Self { - let node = MemoryManager::::allocate( - thread, - size_of::(), - 16, - &METADATA, - AllocationSemantics::Default, - ); - unsafe { - let node = node.as_address().as_mut_ref::(); - node.left = left; - node.right = right; - } - Self(node) - } - - pub fn left(&self) -> NodeRef { - unsafe { - let node = self.0.as_address().as_ref::(); - node.left - } - } - - pub fn right(&self) -> NodeRef { - unsafe { - let node = self.0.as_address().as_ref::(); - node.right - } - } - - pub fn null() -> Self { - Self(VMKitObject::NULL) - } - - pub fn item_check(&self) -> usize { - if self.left() == NodeRef::null() { - 1 - } else { - 1 + self.left().item_check() + self.right().item_check() - } - } - - pub fn leaf(thread: &Thread) -> Self { - Self::new(thread, NodeRef::null(), NodeRef::null()) - } -} - -fn bottom_up_tree(thread: &Thread, depth: usize) -> NodeRef { - if thread.take_yieldpoint() != 0 { - Thread::::yieldpoint(0, Address::ZERO); - } - if depth > 0 { - NodeRef::new( - thread, - bottom_up_tree(thread, depth - 1), - bottom_up_tree(thread, depth - 1), - ) - } else { - NodeRef::leaf(thread) - } -} - -const MIN_DEPTH: usize = 4; - -fn main() { - env_logger::init(); - let nthreads = std::env::var("THREADS") - .unwrap_or("4".to_string()) - .parse::() - .unwrap(); - let mut builder = MMTKBuilder::new(); - builder.options.plan.set(PlanSelector::Immix); - builder.options.threads.set(nthreads); - builder.options.thread_affinity.set(AffinityKind::RoundRobin(vec![0, 1, 2, 3, 4, 5, 6, 7, 8])); - builder.options.gc_trigger.set(mmtk::util::options::GCTriggerSelector::DynamicHeapSize(1*1024*1024*1024, 3*1024*1024*1024)); - VM.set(BenchVM { - vmkit: VMKit::new(builder), - }) - .unwrap_or_else(|_| panic!()); - - Thread::::main(ThreadBenchContext, || { - let thread = Thread::::current(); - let start = std::time::Instant::now(); - let n = std::env::var("DEPTH") - .unwrap_or("18".to_string()) - .parse::() - .unwrap(); - let max_depth = if n < MIN_DEPTH + 2 { MIN_DEPTH + 2 } else { n }; - - let stretch_depth = max_depth + 1; - - println!("stretch tree of depth {stretch_depth}"); - - let _ = bottom_up_tree(&thread, stretch_depth); - let duration = start.elapsed(); - println!("time: {duration:?}"); - - let results = Arc::new(Monitor::new(vec![ - RefCell::new(String::new()); - (max_depth - MIN_DEPTH) / 2 + 1 - ])); - - let mut handles = Vec::new(); - - for d in (MIN_DEPTH..=max_depth).step_by(2) { - let depth = d; - - let thread = Thread::::for_mutator(ThreadBenchContext); - let results = results.clone(); - let handle = thread.start(move || { - let thread = Thread::::current(); - let mut check = 0; - - let iterations = 1 << (max_depth - depth + MIN_DEPTH); - for _ in 1..=iterations { - if thread.take_yieldpoint() != 0 { - Thread::::yieldpoint(0, Address::ZERO); - } - let tree_node = bottom_up_tree(&thread, depth); - check += tree_node.item_check(); - } - - *results.lock_with_handshake::()[(depth - MIN_DEPTH) / 2].borrow_mut() = - format!("{iterations}\t trees of depth {depth}\t check: {check}"); - }); - handles.push(handle); - } - - parked_scope::<(), BenchVM>(|| { - while let Some(handle) = handles.pop() { - handle.join().unwrap(); - } - }); - - for result in results.lock_with_handshake::().iter() { - println!("{}", result.borrow()); - } - - println!( - "long lived tree of depth {max_depth}\t check: {}", - bottom_up_tree(&thread, max_depth).item_check() - ); - - let duration = start.elapsed(); - println!("time: {duration:?}"); - }); -} +fn main() {} diff --git a/vmkit/src/mm.rs b/vmkit/src/mm.rs index 1d45907..14d684a 100644 --- a/vmkit/src/mm.rs +++ b/vmkit/src/mm.rs @@ -10,8 +10,8 @@ use crate::{ use easy_bitfield::{AtomicBitfieldContainer, ToBitfield}; use mmtk::{ util::{ - alloc::{AllocatorSelector, BumpAllocator, FreeListAllocator, ImmixAllocator}, - metadata::side_metadata::GLOBAL_SIDE_METADATA_BASE_ADDRESS, + alloc::{AllocatorSelector, BumpAllocator, ImmixAllocator}, + metadata::side_metadata::GLOBAL_SIDE_METADATA_VM_BASE_ADDRESS, VMMutatorThread, }, vm::{ @@ -147,13 +147,13 @@ impl MemoryManager { return object; } - return Self::allocate_slow(thread, size, alignment, metadata, semantics) + return Self::allocate_slow(thread, size, alignment, metadata, semantics); }, - _ => () + _ => (), }, - _ => () + _ => (), } Self::allocate_out_of_line(thread, orig_size, alignment, metadata, orig_semantics) @@ -388,10 +388,11 @@ impl MemoryManager { match thread.barrier() { BarrierSelector::ObjectBarrier => unsafe { let addr = src.as_address(); - let meta_addr = GLOBAL_SIDE_METADATA_BASE_ADDRESS + (addr >> 6); + let meta_addr = GLOBAL_SIDE_METADATA_VM_BASE_ADDRESS + (addr >> 6); let shift = (addr >> 3) & 0b111; let byte_val = meta_addr.load::(); if (byte_val >> shift) & 1 == 1 { + thread.mutator().barrier().object_reference_write_slow( src.as_object_unchecked(), slot, diff --git a/vmkit/src/mm/conservative_roots.rs b/vmkit/src/mm/conservative_roots.rs index e9e5f75..66e4f5d 100644 --- a/vmkit/src/mm/conservative_roots.rs +++ b/vmkit/src/mm/conservative_roots.rs @@ -29,14 +29,7 @@ impl ConservativeRoots { if pointer < starting_address || pointer > ending_address { return; } - - if self - .roots - .contains(unsafe { &ObjectReference::from_raw_address_unchecked(pointer) }) - { - return; - } - + let Some(start) = mmtk::memory_manager::find_object_from_internal_pointer( pointer, self.internal_pointer_limit, @@ -72,7 +65,7 @@ impl ConservativeRoots { } pub fn add_to_factory(&mut self, factory: &mut impl RootsWorkFactory) { - factory.create_process_tpinning_roots_work(std::mem::take( + factory.create_process_pinning_roots_work(std::mem::take( &mut self.roots.clone().into_iter().collect(), )); } diff --git a/vmkit/src/mm/scanning.rs b/vmkit/src/mm/scanning.rs index 9853395..abf7ca9 100644 --- a/vmkit/src/mm/scanning.rs +++ b/vmkit/src/mm/scanning.rs @@ -104,7 +104,7 @@ impl Scanning> for VMKitScanning { if OPTIONS.conservative_stacks { let mut croots = ConservativeRoots::new(); let bounds = *tls.stack_bounds(); - unsafe { croots.add_span(bounds.origin(), bounds.end()) }; + unsafe { croots.add_span(bounds.origin(), tls.stack_pointer()) }; tls.context.scan_conservative_roots(&mut croots); croots.add_to_factory(&mut factory); } diff --git a/vmkit/src/mm/stack_bounds.rs b/vmkit/src/mm/stack_bounds.rs index 6f68a7a..64a4ccb 100644 --- a/vmkit/src/mm/stack_bounds.rs +++ b/vmkit/src/mm/stack_bounds.rs @@ -159,7 +159,7 @@ impl StackBounds { fn current_thread_stack_bounds_internal() -> Self { let ret = unsafe { Self::new_thread_stack_bounds(libc::pthread_self()) }; - /*#[cfg(target_os = "linux")] + #[cfg(target_os = "linux")] unsafe { // on glibc, pthread_attr_getstack will generally return the limit size (minus a guard page) // for the main thread; this is however not necessarily always true on every libc - for example @@ -182,7 +182,7 @@ impl StackBounds { return Self { origin, bound }; } - }*/ + } ret } diff --git a/vmkit/src/object_model.rs b/vmkit/src/object_model.rs index 839f725..c1c6b82 100644 --- a/vmkit/src/object_model.rs +++ b/vmkit/src/object_model.rs @@ -132,7 +132,7 @@ impl ObjectModel> for VMKitObjectModel } impl VMKitObjectModel { fn move_object(from_obj: VMKitObject, mut to: MoveTarget, num_bytes: usize) -> VMKitObject { - log::trace!("move_object: from_obj: {}, to: {}", from_obj.as_address(), to); + log::trace!("move_object: from_obj: {}, to: {}, bytes={}", from_obj.as_address(), to, num_bytes); let mut copy_bytes = num_bytes; let mut obj_ref_offset = OBJECT_REF_OFFSET; let hash_state = from_obj.header::().hash_state(); @@ -140,6 +140,7 @@ impl VMKitObjectModel { // Adjust copy bytes and object reference offset based on hash state match hash_state { HashState::Hashed => { + copy_bytes -= size_of::(); // Exclude hash code from copy if let MoveTarget::ToAddress(ref mut addr) = to { *addr += size_of::(); // Adjust address for hash code diff --git a/vmkit/src/object_model/object.rs b/vmkit/src/object_model/object.rs index 1eca9ec..3e67bd0 100644 --- a/vmkit/src/object_model/object.rs +++ b/vmkit/src/object_model/object.rs @@ -138,6 +138,35 @@ impl VMKitObject { let metadata = self.header::().metadata().gc_metadata(); let overhead = self.hashcode_overhead::(); + let res = if metadata.instance_size != 0 { + raw_align_up( + metadata.instance_size + size_of::>(), + align_of::(), + ) + overhead + } else { + let Some(compute_size) = metadata.compute_size else { + panic!("compute_size is not set for object at {}", self.0); + }; + + raw_align_up( + compute_size(self) + size_of::>(), + align_of::(), + ) + overhead + }; + + res + } + + /// Returns the number of bytes required when the `VMKitObject` is copied. + /// + /// # Returns + /// + /// * `usize` - The number of bytes required. + #[inline(always)] + pub fn bytes_required_when_copied(self) -> usize { + let metadata = self.header::().metadata().gc_metadata(); + let overhead = self.hashcode_overhead::(); + if metadata.instance_size != 0 { raw_align_up( metadata.instance_size + size_of::>(), @@ -155,19 +184,6 @@ impl VMKitObject { } } - /// Returns the number of bytes required when the `VMKitObject` is copied. - /// - /// # Returns - /// - /// * `usize` - The number of bytes required. - #[inline(always)] - pub fn bytes_required_when_copied(&self) -> usize { - let metadata = self.header::().metadata().gc_metadata(); - let overhead = self.hashcode_overhead::(); - - raw_align_up(metadata.instance_size, align_of::()) + overhead - } - /// Returns the overhead for the hashcode of the `VMKitObject`. /// /// # Arguments diff --git a/vmkit/benches/binarytrees.rs b/vmkit/src/platform.rs similarity index 100% rename from vmkit/benches/binarytrees.rs rename to vmkit/src/platform.rs diff --git a/vmkit/src/platform/wrapper.h b/vmkit/src/platform/wrapper.h new file mode 100644 index 0000000..e69de29 diff --git a/vmkit/src/semaphore.rs b/vmkit/src/semaphore.rs new file mode 100644 index 0000000..e82872d --- /dev/null +++ b/vmkit/src/semaphore.rs @@ -0,0 +1,40 @@ +pub struct Semaphore { + platform_sema: libc::sem_t, +} + +impl Semaphore { + pub fn wait(&self) { + unsafe { + if libc::sem_wait(&self.platform_sema as *const _ as *mut _) != 0 { + panic!("sem_wait failed:{}", errno::errno()); + } + } + } + + pub fn post(&self) { + unsafe { + if libc::sem_post(&self.platform_sema as *const _ as *mut _) != 0 { + panic!("sem_post failed:{}", errno::errno()); + } + } + } + + pub fn new(initial_value: usize) -> Self { + let mut sema = std::mem::MaybeUninit::uninit(); + unsafe { + libc::sem_init(sema.as_mut_ptr(), 0, initial_value as u32); + } + + Self { + platform_sema: unsafe { sema.assume_init() }, + } + } +} + +impl Drop for Semaphore { + fn drop(&mut self) { + unsafe { + libc::sem_destroy(&self.platform_sema as *const _ as *mut _); + } + } +} diff --git a/vmkit/src/sync.rs b/vmkit/src/sync.rs index 68cc126..e10bf17 100644 --- a/vmkit/src/sync.rs +++ b/vmkit/src/sync.rs @@ -230,3 +230,4 @@ impl<'a, T> Drop for MonitorGuard<'a, T> { } } } + diff --git a/vmkit/src/threading.rs b/vmkit/src/threading.rs index 4f54779..3b2cf1d 100644 --- a/vmkit/src/threading.rs +++ b/vmkit/src/threading.rs @@ -3,8 +3,8 @@ use std::{ mem::{offset_of, MaybeUninit}, panic::AssertUnwindSafe, sync::{ - atomic::{AtomicBool, AtomicI32, AtomicI8, AtomicU64, AtomicUsize, Ordering}, - Arc, + atomic::{AtomicBool, AtomicI32, AtomicI8, AtomicPtr, AtomicU64, AtomicUsize, Ordering}, + Arc, LazyLock, }, thread::JoinHandle, }; @@ -15,13 +15,17 @@ use mmtk::{ vm::RootsWorkFactory, AllocationSemantics, BarrierSelector, Mutator, }; +use parking_lot::Once; use crate::{ mm::{ - conservative_roots::ConservativeRoots, stack_bounds::StackBounds, tlab::TLAB, + conservative_roots::ConservativeRoots, + stack_bounds::{current_stack_pointer, StackBounds}, + tlab::TLAB, AllocFastPath, MemoryManager, }, object_model::compression::CompressedOps, + semaphore::Semaphore, sync::{Monitor, MonitorGuard}, VirtualMachine, }; @@ -88,6 +92,11 @@ impl ThreadState { unsafe impl bytemuck::NoUninit for ThreadState {} +#[cfg(unix)] +pub type PlatformThreadHandle = libc::pthread_t; +#[cfg(windows)] +pub type PlatformThreadHandle = winapi::um::winnt::HANDLE; + pub trait ThreadContext { fn save_thread_state(&self); /// Scan roots in the thread. @@ -174,6 +183,7 @@ pub struct Thread { /// associated with the Thread. is_joinable: AtomicBool, thread_id: u64, + tid: AtomicU64, index_in_manager: AtomicUsize, yieldpoints_enabled_count: AtomicI8, @@ -184,10 +194,14 @@ pub struct Thread { is_blocked_for_gc: AtomicBool, should_block_for_gc: AtomicBool, + + stack_pointer: Atomic, + suspend_count: AtomicUsize, /// The monitor of the thread. Protects access to the thread's state. monitor: Monitor<()>, communication_lock: Monitor<()>, stack_bounds: OnceCell, + platform_handle: Cell, } unsafe impl Send for Thread {} @@ -222,12 +236,16 @@ impl Thread { } Arc::new(Self { + suspend_count: AtomicUsize::new(0), + stack_pointer: Atomic::new(0), + platform_handle: Cell::new(unsafe { std::mem::MaybeUninit::zeroed().assume_init() }), tlab: UnsafeCell::new(TLAB::new()), stack_bounds: OnceCell::new(), barrier: Cell::new(BarrierSelector::NoBarrier), alloc_fastpath: Cell::new(AllocFastPath::None), max_non_los_default_alloc_bytes: Cell::new(0), take_yieldpoint: AtomicI32::new(0), + tid: AtomicU64::new(0), context: ctx.unwrap_or_else(|| VM::ThreadContext::new(collector_context)), mmtk_mutator: UnsafeCell::new(MaybeUninit::uninit()), has_collector_context: AtomicBool::new(collector_context), @@ -266,22 +284,64 @@ impl Thread { } } - /*pub(crate) fn start_gc(self: &Arc, ctx: Box>>) { + pub unsafe fn register_mutator_manual() -> Arc { + let this = Self::for_mutator(VM::ThreadContext::new(false)); unsafe { - self.set_exec_status(ThreadState::InNative); - let this = self.clone(); - std::thread::spawn(move || { - let vmkit = VM::get().vmkit(); - init_current_thread(this.clone()); - vmkit.thread_manager().add_thread(this.clone()); - mmtk::memory_manager::start_worker( - &vmkit.mmtk, - mmtk::util::VMWorkerThread(this.to_vm_thread()), - ctx, - ); - }); + this.tid.store(libc::gettid() as _, Ordering::Relaxed); } - }*/ + init_current_thread(this.clone()); + let constraints = VM::get().vmkit().mmtk.get_plan().constraints(); + this.max_non_los_default_alloc_bytes + .set(constraints.max_non_los_default_alloc_bytes); + this.barrier.set(constraints.barrier); + let selector = mmtk::memory_manager::get_allocator_mapping( + &VM::get().vmkit().mmtk, + AllocationSemantics::Default, + ); + match selector { + AllocatorSelector::BumpPointer(_) | AllocatorSelector::Immix(_) => { + this.alloc_fastpath.set(AllocFastPath::TLAB); + } + + AllocatorSelector::FreeList(_) => { + this.alloc_fastpath.set(AllocFastPath::FreeList); + } + + _ => this.alloc_fastpath.set(AllocFastPath::None), + } + + this.stack_bounds + .set(StackBounds::current_thread_stack_bounds()) + .unwrap(); + let vmkit = VM::get().vmkit(); + if !this.is_collector_thread() && !this.ignore_handshakes_and_gc() { + let mutator = mmtk::memory_manager::bind_mutator( + &vmkit.mmtk, + VMMutatorThread(this.to_vm_thread()), + ); + unsafe { this.mmtk_mutator.get().write(MaybeUninit::new(*mutator)) }; + this.enable_yieldpoints(); + } + vmkit.thread_manager.add_thread(this.clone()); + unsafe { + let handle; + #[cfg(unix)] + { + handle = libc::pthread_self(); + } + #[cfg(windows)] + { + handle = winapi::um::processthreadsapi::GetCurrentThread(); + } + + this.platform_handle.set(handle); + } + this + } + pub unsafe fn unregister_mutator_manual() { + let current = Self::current(); + current.terminate(); + } /// Start execution of `self` by creating and starting a native thread. pub fn start(self: &Arc, f: F) -> JoinHandle> @@ -295,27 +355,6 @@ impl Thread { std::thread::spawn(move || this.startoff(f)) } } - - pub fn to_vm_thread(&self) -> VMThread { - unsafe { std::mem::transmute(self) } - } - - pub fn stack_bounds(&self) -> &StackBounds { - self.stack_bounds.get().unwrap() - } - - pub fn barrier(&self) -> BarrierSelector { - self.barrier.get() - } - - pub fn alloc_fastpath(&self) -> AllocFastPath { - self.alloc_fastpath.get() - } - - pub fn max_non_los_default_alloc_bytes(&self) -> usize { - self.max_non_los_default_alloc_bytes.get() - } - /// Begin execution of current thread by calling `run` method /// on the provided context. fn startoff(self: &Arc, f: F) -> Option @@ -323,6 +362,9 @@ impl Thread { F: FnOnce() -> R, R: Send + 'static, { + unsafe { + self.tid.store(libc::gettid() as _, Ordering::Relaxed); + } init_current_thread(self.clone()); let constraints = VM::get().vmkit().mmtk.get_plan().constraints(); self.max_non_los_default_alloc_bytes @@ -358,15 +400,47 @@ impl Thread { self.enable_yieldpoints(); } vmkit.thread_manager.add_thread(self.clone()); + unsafe { + let handle; + #[cfg(unix)] + { + handle = libc::pthread_self(); + } + #[cfg(windows)] + { + handle = winapi::um::processthreadsapi::GetCurrentThread(); + } + self.platform_handle.set(handle); + } let _result = std::panic::catch_unwind(AssertUnwindSafe(|| f())); - self.terminate(); + unsafe { self.terminate() }; _result.ok() } - fn terminate(&self) { + pub fn to_vm_thread(&self) -> VMThread { + unsafe { std::mem::transmute(self) } + } + + pub fn stack_bounds(&self) -> &StackBounds { + self.stack_bounds.get().unwrap() + } + + pub fn barrier(&self) -> BarrierSelector { + self.barrier.get() + } + + pub fn alloc_fastpath(&self) -> AllocFastPath { + self.alloc_fastpath.get() + } + + pub fn max_non_los_default_alloc_bytes(&self) -> usize { + self.max_non_los_default_alloc_bytes.get() + } + + pub unsafe fn terminate(&self) { self.is_joinable.store(true, Ordering::Relaxed); self.monitor.notify_all(); self.add_about_to_terminate(); @@ -661,10 +735,19 @@ impl Thread { /// and reacquire the lock, since there cannot be a race with broadcast() once /// we have committed to not calling wait() again. pub fn check_block(&self) { + self.stack_pointer + .store(current_stack_pointer().as_usize(), Ordering::Relaxed); self.context.save_thread_state(); self.check_block_no_save_context(); } + /// Return this thread's stack pointer. + /// + /// Note: Does not guarantee that the returned value is currently active stack pointer. + pub fn stack_pointer(&self) -> Address { + unsafe { Address::from_usize(self.stack_pointer.load(Ordering::Relaxed)) } + } + fn enter_native_blocked_impl(&self) { let lock = self.monitor.lock_no_handshake(); @@ -768,7 +851,7 @@ impl Thread { current_thread::().thread_id, self.thread_id ); - + while A::has_block_request_with_token(self, token) && !A::is_blocked(self) && !self.is_about_to_terminate() @@ -803,6 +886,11 @@ impl Thread { current_thread::() } + pub fn platform_handle(&self) -> PlatformThreadHandle { + self.platform_handle.get() + } + + pub fn begin_pair_with<'a>( &'a self, other: &'a Thread, @@ -1195,6 +1283,7 @@ pub struct ThreadManager { soft_handshake_left: AtomicUsize, soft_handshake_data_lock: Monitor<()>, handshake_lock: Monitor>>>>, + thread_suspension: ThreadSuspension, } struct ThreadManagerInner { @@ -1220,6 +1309,11 @@ impl ThreadManager { soft_handshake_left: AtomicUsize::new(0), soft_handshake_data_lock: Monitor::new(()), handshake_lock: Monitor::new(RefCell::new(Vec::new())), + thread_suspension: if cfg!(feature = "uncooperative") { + ThreadSuspension::SignalBased + } else { + ThreadSuspension::Yieldpoint + }, } } @@ -1306,88 +1400,122 @@ impl ThreadManager { /// Fixpoint until there are no threads that we haven't blocked. Fixpoint is needed to /// catch the (unlikely) case that a thread spawns another thread while we are waiting. pub fn block_all_mutators_for_gc(&self) -> Vec>> { + if self.thread_suspension == ThreadSuspension::Yieldpoint { + let mut handshake_threads = Vec::with_capacity(4); + loop { + let lock = self.inner.lock_no_handshake(); + let lock = lock.borrow(); + // (1) find all threads that need to be blocked for GC - let mut handshake_threads = Vec::with_capacity(4); - loop { + for i in 0..lock.threads.len() { + if let Some(t) = lock.threads[i].clone() { + if !t.is_collector_thread() && !t.ignore_handshakes_and_gc() { + handshake_threads.push(t.clone()); + } + } + } + + drop(lock); + // (2) Remove any threads that have already been blocked from the list. + handshake_threads.retain(|t| { + let lock = t.monitor().lock_no_handshake(); + if t.is_blocked_for::() + || t.block_unchecked::(true).not_running() + { + drop(lock); + false + } else { + drop(lock); + true + } + }); + + // (3) Quit trying to block threads if all threads are either blocked + // or not running (a thread is "not running" if it is NEW or TERMINATED; + // in the former case it means that the thread has not had start() + // called on it while in the latter case it means that the thread + // is either in the TERMINATED state or is about to be in that state + // real soon now, and will not perform any heap-related work before + // terminating). + if handshake_threads.is_empty() { + break; + } + // (4) Request a block for GC from all other threads. + while let Some(thread) = handshake_threads.pop() { + let lock = thread.monitor().lock_no_handshake(); + thread.block_unchecked::(false); + drop(lock); + } + } + // Deal with terminating threads to ensure that all threads are either dead to MMTk or stopped above. + self.process_about_to_terminate(); + + self.inner + .lock_no_handshake() + .borrow() + .threads + .iter() + .flatten() + .filter(|t| t.is_blocked_for::()) + .cloned() + .collect::>() + } else { + self.process_about_to_terminate(); + let mut handshake_threads = Vec::with_capacity(4); + for thread in self.threads() { + if !thread.is_collector_thread() + && !thread.ignore_handshakes_and_gc() + && !thread.is_about_to_terminate() + { + let locker = ThreadSuspendLocker::new(); + + unsafe { + thread.suspend(&locker); + } + + handshake_threads.push(thread.clone()); + } + } + self.process_about_to_terminate(); + handshake_threads + } + } + + /// Unblock all mutators blocked for GC. + pub fn unblock_all_mutators_for_gc(&self) { + if self.thread_suspension == ThreadSuspension::Yieldpoint { + let mut handshake_threads = Vec::with_capacity(4); let lock = self.inner.lock_no_handshake(); let lock = lock.borrow(); - // (1) find all threads that need to be blocked for GC - for i in 0..lock.threads.len() { - if let Some(t) = lock.threads[i].clone() { - if !t.is_collector_thread() && !t.ignore_handshakes_and_gc() { - handshake_threads.push(t.clone()); + for thread in lock.threads.iter() { + if let Some(thread) = thread { + if !thread.is_collector_thread() { + handshake_threads.push(thread.clone()); } } } drop(lock); - // (2) Remove any threads that have already been blocked from the list. - handshake_threads.retain(|t| { - let lock = t.monitor().lock_no_handshake(); - if t.is_blocked_for::() - || t.block_unchecked::(true).not_running() - { - drop(lock); - false - } else { - drop(lock); - true - } - }); - // (3) Quit trying to block threads if all threads are either blocked - // or not running (a thread is "not running" if it is NEW or TERMINATED; - // in the former case it means that the thread has not had start() - // called on it while in the latter case it means that the thread - // is either in the TERMINATED state or is about to be in that state - // real soon now, and will not perform any heap-related work before - // terminating). - if handshake_threads.is_empty() { - break; - } - // (4) Request a block for GC from all other threads. while let Some(thread) = handshake_threads.pop() { let lock = thread.monitor().lock_no_handshake(); - thread.block_unchecked::(false); + thread.unblock::(); drop(lock); } - } - // Deal with terminating threads to ensure that all threads are either dead to MMTk or stopped above. - self.process_about_to_terminate(); - - self.inner - .lock_no_handshake() - .borrow() - .threads - .iter() - .flatten() - .filter(|t| t.is_blocked_for::()) - .cloned() - .collect::>() - } - - /// Unblock all mutators blocked for GC. - pub fn unblock_all_mutators_for_gc(&self) { - let mut handshake_threads = Vec::with_capacity(4); - let lock = self.inner.lock_no_handshake(); - let lock = lock.borrow(); - - for thread in lock.threads.iter() { - if let Some(thread) = thread { - if !thread.is_collector_thread() { - handshake_threads.push(thread.clone()); + } else { + for thread in self.threads() { + if !thread.is_collector_thread() && !thread.ignore_handshakes_and_gc() { + let locker = ThreadSuspendLocker::new(); + unsafe { + thread.resume(&locker); + } + if thread.is_blocked_for::() { + thread.unblock::(); + } } } } - - drop(lock); - - while let Some(thread) = handshake_threads.pop() { - let lock = thread.monitor().lock_no_handshake(); - thread.unblock::(); - drop(lock); - } } } @@ -1612,3 +1740,192 @@ impl>) -> bool> SoftHandshakeVisitor { + guard: MonitorGuard<'a, ()>, +} + +impl<'a> ThreadSuspendLocker<'a> { + pub fn new() -> Self { + Self { + guard: THREAD_SUSPEND_MONITOR.lock_no_handshake(), + } + } +} + +impl Thread { + /// Suspend the thread by sending a signal to it. + /// + /// This function internally uses `pthread_kill`. + /// + /// # SAFETY + /// + /// Suspends thread at random point of execution, does not guarantee any thread state consistency + /// and will not release any held locks, might trigger memory leaks or segfaults. Use at your own risk. + pub unsafe fn suspend(&self, _locker: &ThreadSuspendLocker<'_>) -> bool { + if self.suspend_count.load(Ordering::Relaxed) == 0 { + while self.platform_handle.get() == 0 { + std::thread::yield_now(); // spin wait for thread to be established + } + TARGET_THREAD.store(self as *const Self as *mut _, Ordering::Relaxed); + unsafe { + loop { + // We must use pthread_kill to avoid queue-overflow problem with real-time signals. + let result = + libc::pthread_kill(self.platform_handle.get(), SIG_THREAD_SUSPEND_RESUME); + if result != 0 { + return false; + } + + GLOBAL_SEMAPHORE_FOR_SUSPEND_RESUME.wait(); + if self.stack_pointer.load(Ordering::Relaxed) != 0 { + break; + } + // Because of an alternative signal stack, we failed to suspend this thread. + // Retry suspension again after yielding. + std::thread::yield_now(); + } + } + } + + self.suspend_count.fetch_add(1, Ordering::Relaxed); + true + } + + /// Resume thread from suspended state. + /// + /// # SAFETY + /// + /// Resumes thread at random point of execution, does not guarantee any thread state consistency + /// and will not release any held locks, might trigger memory leaks or segfaults. Use at your own risk. + pub unsafe fn resume(&self, _locker: &ThreadSuspendLocker<'_>) { + if self.suspend_count.load(Ordering::Relaxed) == 1 { + // When allowing sigThreadSuspendResume interrupt in the signal handler by sigsuspend and SigThreadSuspendResume is actually issued, + // the signal handler itself will be called once again. + // There are several ways to distinguish the handler invocation for suspend and resume. + // 1. Use different signal numbers. And check the signal number in the handler. + // 2. Use some arguments to distinguish suspend and resume in the handler. + // 3. Use thread's flag. + // In this implementaiton, we take (3). suspend_count is used to distinguish it. + // Note that we must use pthread_kill to avoid queue-overflow problem with real-time signals. + TARGET_THREAD.store(self as *const Self as *mut _, Ordering::Relaxed); + unsafe { + if libc::pthread_kill(self.platform_handle.get(), SIG_THREAD_SUSPEND_RESUME) + == libc::ESRCH + { + return; + } + + GLOBAL_SEMAPHORE_FOR_SUSPEND_RESUME.wait(); + } + } + + self.suspend_count.fetch_sub(1, Ordering::Relaxed); + } +} + +/* thread signal handlers for STW implementation + + NOTE: DO NOT USE `Drop` types in the code! It migth not be cleaned up properly. +*/ + +static TARGET_THREAD: AtomicPtr<()> = AtomicPtr::new(std::ptr::null_mut()); + +extern "C-unwind" fn signal_handler_suspend_resume( + _signal: i32, + _info: *const libc::siginfo_t, +) { + let target = TARGET_THREAD.load(Ordering::Relaxed).cast::>(); + let thread = unsafe { target.as_ref().unwrap() }; + // empty GC caches if there's any. + unsafe { + MemoryManager::::flush_tlab(thread); + } + if thread.suspend_count.load(Ordering::Relaxed) != 0 { + // This is signal handler invocation that is intended to be used to resume sigsuspend. + // So this handler invocation itself should not process. + // + // When signal comes, first, the system calls signal handler. And later, sigsuspend will be resumed. Signal handler invocation always precedes. + // So, the problem never happens that suspended.store(true, ...) will be executed before the handler is called. + // http://pubs.opengroup.org/onlinepubs/009695399/functions/sigsuspend.html + return; + } + + let approximate_stack_pointer = current_stack_pointer(); + if !thread.stack_bounds().contains(approximate_stack_pointer) { + // This happens if we use an alternative signal stack. + // 1. A user-defined signal handler is invoked with an alternative signal stack. + // 2. In the middle of the execution of the handler, we attempt to suspend the target thread. + // 3. A nested signal handler is executed. + // 4. The stack pointer saved in the machine context will be pointing to the alternative signal stack. + // In this case, we back off the suspension and retry a bit later. + thread.stack_pointer.store(0, Ordering::Relaxed); + GLOBAL_SEMAPHORE_FOR_SUSPEND_RESUME.post(); + + return; + } + thread + .stack_pointer + .store(approximate_stack_pointer.as_usize(), Ordering::Release); + // Allow suspend caller to see that this thread is suspended. + // sem_post is async-signal-safe function. It means that we can call this from a signal handler. + // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html#tag_02_04_03 + // + // And sem_post emits memory barrier that ensures that stack_pointer is correctly saved. + // http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_11 + GLOBAL_SEMAPHORE_FOR_SUSPEND_RESUME.post(); + + // Reaching here, sigThreadSuspendResume is blocked in this handler (this is configured by sigaction's sa_mask). + // So before calling sigsuspend, sigThreadSuspendResume to this thread is deferred. This ensures that the handler is not executed recursively. + let mut blocked_signal_set = std::mem::MaybeUninit::uninit(); + thread.acknowledge_block_requests(); + unsafe { + libc::sigfillset(blocked_signal_set.as_mut_ptr()); + libc::sigdelset(blocked_signal_set.as_mut_ptr(), SIG_THREAD_SUSPEND_RESUME); + libc::sigsuspend(blocked_signal_set.as_mut_ptr()); + } + + let target = TARGET_THREAD.load(Ordering::Relaxed).cast::>(); + let thread = unsafe { target.as_ref().unwrap() }; + // Allow resume caller to see that this thread is resumed. + thread.stack_pointer.store(0, Ordering::Relaxed); + GLOBAL_SEMAPHORE_FOR_SUSPEND_RESUME.post(); +} + +pub(crate) fn initialize_threading() { + static ONCE: Once = Once::new(); + + ONCE.call_once(|| unsafe { + let mut action: libc::sigaction = std::mem::MaybeUninit::zeroed().assume_init(); + + libc::sigemptyset(&mut action.sa_mask); + libc::sigaddset(&mut action.sa_mask, SIG_THREAD_SUSPEND_RESUME); + + action.sa_sigaction = signal_handler_suspend_resume:: as usize; + action.sa_flags = libc::SA_RESTART | libc::SA_SIGINFO; + + let res = libc::sigaction(SIG_THREAD_SUSPEND_RESUME, &action, std::ptr::null_mut()); + if res != 0 { + eprintln!("failed to install signal handler for SIG_THREAD_SUSPEND_RESUME"); + std::process::abort(); + } + }); +} + +pub(crate) static THREAD_SUSPEND_MONITOR: Monitor<()> = Monitor::new(()); +pub(crate) static GLOBAL_SEMAPHORE_FOR_SUSPEND_RESUME: LazyLock = + LazyLock::new(|| Semaphore::new(0)); + +/// Thread suspension method. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ThreadSuspension { + /// Thread suspension is done by periodically calling [`Thread::yieldpoint`]. This is the most + /// precise method and also is the most safe one: we are in absolute control of thread state in this mode. + Yieldpoint, + /// Thread suspension is done by sending a signal to the thread. This is less precise and less safe: + /// we are not in absolute control of thread state in this mode. + SignalBased, +}