background_hang_monitor: ensure workers run until monitored components do (#38322)

Shut-down of the background hang monitor(bhm) is currently  problematic:

- it does not always run until the monitored script-thread does(see
"BackgroundHangMonitor has gone away" mentioned in
https://github.com/servo/servo/issues/34158).
- it shuts-down before the constellation(good, so actually
https://github.com/servo/servo/issues/24850 was "fixed" but in a way
that introduced a new problem), but using a mechanism that allows it to
shutdown before script(the problem above).
- there are various mechanism(see the doc comments removed by this PR)
in place which are meant to ensure a clean shutdown despite the above
problems; those are complicated, and become unnecessary once those
problems are fixed.

All of the above is fixed by the changes in this PR, which ensure the
bhm does not shut-down before script, and also maintains the invariant
that it must shut-down before the constellation(in single-process mode)
or before the main thread(in multi-process mode), but using a mechanism
which allows it to keep running until script shuts-down.

An unnecessary option around the exit signal is also removed.

As a positive side-effect, it also ensures that any script-thread is
shut-down before the constellation(because for the bhm worker to exit,
the monitored script must have exited first), so this should also fix a
host of other problems noted in
https://github.com/servo/servo/issues/30849, but each should be
confirmed independently(and various other improvements seem possible in
their specific contexts, such as joining on script threads, and removing
the `ScriptThreadMessage::ExitScriptThread`).

Fixes: https://github.com/servo/servo/issues/24850 and part of
https://github.com/servo/servo/issues/34158

Testing: Unit tests in `component/background_hang_monitor/tests`. Also
manually tested loading "about-blank" in single- and multi-process mode.

---------

Signed-off-by: gterzian <2792687+gterzian@users.noreply.github.com>
This commit is contained in:
Gregory Terzian 2025-07-30 21:03:28 +08:00 committed by GitHub
parent e5334a64c4
commit 815ed10b5f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 147 additions and 202 deletions

View file

@ -27,16 +27,23 @@ fn test_hang_monitoring() {
ipc::channel().expect("ipc channel failure");
let (_sampler_sender, sampler_receiver) = ipc::channel().expect("ipc channel failure");
let background_hang_monitor_register = HangMonitorRegister::init(
let (background_hang_monitor_register, join_handle) = HangMonitorRegister::init(
background_hang_monitor_ipc_sender.clone(),
sampler_receiver,
true,
);
struct BHMExitSignal;
impl BackgroundHangMonitorExitSignal for BHMExitSignal {
fn signal_to_exit(&self) {}
}
let background_hang_monitor = background_hang_monitor_register.register_component(
MonitoredComponentId(TEST_PIPELINE_ID, MonitoredComponentType::Script),
Duration::from_millis(10),
Duration::from_millis(1000),
None,
Box::new(BHMExitSignal),
);
// Start an activity.
@ -119,6 +126,11 @@ fn test_hang_monitoring() {
// Still no new alerts because the hang monitor has shut-down already.
assert!(background_hang_monitor_receiver.try_recv().is_err());
// Join on the worker thread(channels are dropped above).
join_handle
.join()
.expect("Failed to join on the BHM worker thread");
}
#[test]
@ -131,16 +143,23 @@ fn test_hang_monitoring_unregister() {
ipc::channel().expect("ipc channel failure");
let (_sampler_sender, sampler_receiver) = ipc::channel().expect("ipc channel failure");
let background_hang_monitor_register = HangMonitorRegister::init(
let (background_hang_monitor_register, join_handle) = HangMonitorRegister::init(
background_hang_monitor_ipc_sender.clone(),
sampler_receiver,
true,
);
struct BHMExitSignal;
impl BackgroundHangMonitorExitSignal for BHMExitSignal {
fn signal_to_exit(&self) {}
}
let background_hang_monitor = background_hang_monitor_register.register_component(
MonitoredComponentId(TEST_PIPELINE_ID, MonitoredComponentType::Script),
Duration::from_millis(10),
Duration::from_millis(1000),
None,
Box::new(BHMExitSignal),
);
// Start an activity.
@ -155,6 +174,13 @@ fn test_hang_monitoring_unregister() {
// No new alert yet
assert!(background_hang_monitor_receiver.try_recv().is_err());
// Drop the channels and join on the worker thread.
drop(background_hang_monitor);
drop(background_hang_monitor_register);
join_handle
.join()
.expect("Failed to join on the BHM worker thread");
}
// Perform two certain steps in `test_hang_monitoring_exit_signal_inner` in
@ -218,15 +244,13 @@ fn test_hang_monitoring_exit_signal_inner(op_order: fn(&mut dyn FnMut(), &mut dy
}));
// Init a worker, without active monitoring.
let background_hang_monitor_register = HangMonitorRegister::init(
let (background_hang_monitor_register, join_handle) = HangMonitorRegister::init(
background_hang_monitor_ipc_sender.clone(),
control_receiver,
false,
);
let mut background_hang_monitor = None;
let (exit_sender, exit_receiver) = ipc::channel().expect("Failed to create IPC channel!");
let mut exit_sender = Some(exit_sender);
// `op_order` determines the order in which these two closures are
// executed.
@ -237,24 +261,26 @@ fn test_hang_monitoring_exit_signal_inner(op_order: fn(&mut dyn FnMut(), &mut dy
MonitoredComponentId(TEST_PIPELINE_ID, MonitoredComponentType::Script),
Duration::from_millis(10),
Duration::from_millis(1000),
Some(signal.take().unwrap()),
signal.take().unwrap(),
));
},
&mut || {
// Send the exit message.
control_sender
.send(BackgroundHangMonitorControlMsg::Exit(
exit_sender.take().unwrap(),
))
.send(BackgroundHangMonitorControlMsg::Exit)
.unwrap();
},
);
// Assert we receive a confirmation back.
assert!(exit_receiver.recv().is_ok());
// Assert we get the exit signal.
while !closing.load(Ordering::SeqCst) {
thread::sleep(Duration::from_millis(10));
}
// Drop the channels and join on the worker thread.
drop(background_hang_monitor);
drop(background_hang_monitor_register);
join_handle
.join()
.expect("Failed to join on the BHM worker thread");
}