background_hang_monitor: ensure workers run until monitored components do (#38322)

Shut-down of the background hang monitor(bhm) is currently problematic: - it does not always run until the monitored script-thread does(see "BackgroundHangMonitor has gone away" mentioned in https://github.com/servo/servo/issues/34158). - it shuts-down before the constellation(good, so actually https://github.com/servo/servo/issues/24850 was "fixed" but in a way that introduced a new problem), but using a mechanism that allows it to shutdown before script(the problem above). - there are various mechanism(see the doc comments removed by this PR) in place which are meant to ensure a clean shutdown despite the above problems; those are complicated, and become unnecessary once those problems are fixed. All of the above is fixed by the changes in this PR, which ensure the bhm does not shut-down before script, and also maintains the invariant that it must shut-down before the constellation(in single-process mode) or before the main thread(in multi-process mode), but using a mechanism which allows it to keep running until script shuts-down. An unnecessary option around the exit signal is also removed. As a positive side-effect, it also ensures that any script-thread is shut-down before the constellation(because for the bhm worker to exit, the monitored script must have exited first), so this should also fix a host of other problems noted in https://github.com/servo/servo/issues/30849, but each should be confirmed independently(and various other improvements seem possible in their specific contexts, such as joining on script threads, and removing the `ScriptThreadMessage::ExitScriptThread`). Fixes: https://github.com/servo/servo/issues/24850 and part of https://github.com/servo/servo/issues/34158 Testing: Unit tests in `component/background_hang_monitor/tests`. Also manually tested loading "about-blank" in single- and multi-process mode. --------- Signed-off-by: gterzian <2792687+gterzian@users.noreply.github.com>
2025-09-29 16:19:14 +01:00 · 2025-07-30 21:03:28 +08:00 · 2025-07-30 21:03:28 +08:00 · 815ed10b5f
commit 815ed10b5f
parent e5334a64c4
7 changed files with 147 additions and 202 deletions
--- a/components/constellation/constellation.rs
+++ b/components/constellation/constellation.rs
@ -92,6 +92,7 @@ use std::marker::PhantomData;
 use std::mem::replace;
 use std::rc::{Rc, Weak};
 use std::sync::{Arc, Mutex};
+use std::thread::JoinHandle;
 use std::{process, thread};

 use background_hang_monitor::HangMonitorRegister;
@ -281,6 +282,9 @@ pub struct Constellation<STF, SWF> {
    /// None when in multiprocess mode.
    background_monitor_register: Option<Box<dyn BackgroundHangMonitorRegister>>,

+    /// In single process mode, a join handle on the BHM worker thread.
+    background_monitor_register_join_handle: Option<JoinHandle<()>>,
+
    /// Channels to control all background-hang monitors.
    /// TODO: store them on the relevant BrowsingContextGroup,
    /// so that they could be controlled on a "per-tab/event-loop" basis.
@ -598,23 +602,28 @@ where
                // If we are in multiprocess mode,
                // a dedicated per-process hang monitor will be initialized later inside the content process.
                // See run_content_process in servo/lib.rs
-                let (background_monitor_register, background_hang_monitor_control_ipc_senders) =
-                    if opts::get().multiprocess {
-                        (None, vec![])
-                    } else {
-                        let (
-                            background_hang_monitor_control_ipc_sender,
-                            background_hang_monitor_control_ipc_receiver,
-                        ) = ipc::channel().expect("ipc channel failure");
-                        (
-                            Some(HangMonitorRegister::init(
-                                background_hang_monitor_ipc_sender.clone(),
-                                background_hang_monitor_control_ipc_receiver,
-                                opts::get().background_hang_monitor,
-                            )),
-                            vec![background_hang_monitor_control_ipc_sender],
-                        )
-                    };
+                let (
+                    background_monitor_register,
+                    background_monitor_register_join_handle,
+                    background_hang_monitor_control_ipc_senders,
+                ) = if opts::get().multiprocess {
+                    (None, None, vec![])
+                } else {
+                    let (
+                        background_hang_monitor_control_ipc_sender,
+                        background_hang_monitor_control_ipc_receiver,
+                    ) = ipc::channel().expect("ipc channel failure");
+                    let (register, join_handle) = HangMonitorRegister::init(
+                        background_hang_monitor_ipc_sender.clone(),
+                        background_hang_monitor_control_ipc_receiver,
+                        opts::get().background_hang_monitor,
+                    );
+                    (
+                        Some(register),
+                        Some(join_handle),
+                        vec![background_hang_monitor_control_ipc_sender],
+                    )
+                };

                let swmanager_receiver =
                    route_ipc_receiver_to_new_crossbeam_receiver_preserving_errors(
@ -639,6 +648,7 @@ where
                    background_hang_monitor_sender: background_hang_monitor_ipc_sender,
                    background_hang_monitor_receiver,
                    background_monitor_register,
+                    background_monitor_register_join_handle,
                    background_monitor_control_senders: background_hang_monitor_control_ipc_senders,
                    script_receiver,
                    compositor_receiver,
@ -2440,15 +2450,13 @@ where
        // even when currently hanging(on JS or sync XHR).
        // This must be done before starting the process of closing all pipelines.
        for chan in &self.background_monitor_control_senders {
-            let (exit_ipc_sender, exit_ipc_receiver) =
-                ipc::channel().expect("Failed to create IPC channel!");
-            if let Err(e) = chan.send(BackgroundHangMonitorControlMsg::Exit(exit_ipc_sender)) {
+            // Note: the bhm worker thread will continue to run
+            // until all monitored components have exited,
+            // at which point we can join on the thread(done in `handle_shutdown`).
+            if let Err(e) = chan.send(BackgroundHangMonitorControlMsg::Exit) {
                warn!("error communicating with bhm: {}", e);
                continue;
            }
-            if exit_ipc_receiver.recv().is_err() {
-                warn!("Failed to receive exit confirmation from BHM.");
-            }
        }

        // Close the top-level browsing contexts
@ -2508,6 +2516,14 @@ where
    fn handle_shutdown(&mut self) {
        debug!("Handling shutdown.");

+        // In single process mode, join on the background hang monitor worker thread.
+        drop(self.background_monitor_register.take());
+        if let Some(join_handle) = self.background_monitor_register_join_handle.take() {
+            join_handle
+                .join()
+                .expect("Failed to join on the BHM background thread.");
+        }
+
        // At this point, there are no active pipelines,
        // so we can safely block on other threads, without worrying about deadlock.
        // Channels to receive signals when threads are done exiting.