mirror of
https://github.com/servo/servo.git
synced 2025-06-06 16:45:39 +00:00
Auto merge of #29443 - delan:fix-thread-crash-output, r=jdm
Improve winit/mach segfault output This patch improves the output that you see when the winit port segfaults, especially when the segfault happens on a layout or script thread, by making two changes to our crash handler and one change to mach. * we make the crash handler use stderr instead of stdout, because [std::io::stdout](https://doc.rust-lang.org/std/io/fn.stdout.html) allocates when first called, which will often cause a second segfault * we make the crash handler reraise the signal to terminate abnormally, allowing mach to distinguish it from exiting normally with the same status code as a signal number * we make mach print different messages for whether the process terminated by signal or exited normally You can try this yourself by running servo with the following patch: ```diff diff --git a/components/layout_thread/lib.rs b/components/layout_thread/lib.rs index ac1a7fe21e..77c4c9d087 100644 --- a/components/layout_thread/lib.rs +++ b/components/layout_thread/lib.rs @@ -589,6 +589,7 @@ impl LayoutThread { rw_data: &rw_data, possibly_locked_rw_data: &mut possibly_locked_rw_data, }; + unsafe { std::ptr::read::<usize>(std::ptr::null()); } while self.handle_request(&mut rw_data) { // Loop indefinitely. } ``` Before: ``` $ ./mach run -d about:blank [should print “Stack trace for "Layout(1,1)"” here, but segfaults] Servo exited with return value 11 ``` After: ``` $ ./mach run -d about:blank Caught signal 11 in thread "Layout(1,1)" [should ideally print backtrace here, but segfaults] Servo was terminated by signal 11 ``` --- <!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `___` with appropriate data: --> - [x] `./mach build -d` does not report any errors - [x] `./mach test-tidy` does not report any errors - [x] These changes fix #29442 <!-- Either: --> - [ ] There are tests for these changes OR - [x] These changes do not require tests because it would be impractical to test them (r? @jdm, @SimonSapin based on git log for crash handler)
This commit is contained in:
commit
05c431f626
2 changed files with 34 additions and 9 deletions
|
@ -8,24 +8,46 @@ pub fn install() {}
|
|||
#[cfg(any(target_os = "macos", target_os = "linux"))]
|
||||
pub fn install() {
|
||||
use crate::backtrace;
|
||||
use libc::_exit;
|
||||
use sig::ffi::Sig;
|
||||
use std::{io::Write, sync::atomic, thread};
|
||||
|
||||
extern "C" fn handler(sig: i32) {
|
||||
// Only print crash message and backtrace the first time, to avoid
|
||||
// infinite recursion if the printing causes another signal.
|
||||
static BEEN_HERE_BEFORE: atomic::AtomicBool = atomic::AtomicBool::new(false);
|
||||
if !BEEN_HERE_BEFORE.swap(true, atomic::Ordering::SeqCst) {
|
||||
let stdout = std::io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
let _ = write!(&mut stdout, "Stack trace");
|
||||
// stderr is unbuffered, so we won’t lose output if we crash later
|
||||
// in this handler, and the std::io::stderr() call never allocates.
|
||||
// std::io::stdout() allocates the first time it’s called, which in
|
||||
// practice will often segfault (see below).
|
||||
let stderr = std::io::stderr();
|
||||
let mut stderr = stderr.lock();
|
||||
let _ = write!(&mut stderr, "Caught signal {sig}");
|
||||
if let Some(name) = thread::current().name() {
|
||||
let _ = write!(&mut stdout, " for thread \"{}\"", name);
|
||||
let _ = write!(&mut stderr, " in thread \"{}\"", name);
|
||||
}
|
||||
let _ = write!(&mut stdout, "\n");
|
||||
let _ = backtrace::print(&mut stdout);
|
||||
let _ = write!(&mut stderr, "\n");
|
||||
|
||||
// This call always allocates, which in practice will segfault if
|
||||
// we’re handling a non-main-thread (e.g. layout) segfault. Strictly
|
||||
// speaking in POSIX terms, this is also undefined behaviour.
|
||||
let _ = backtrace::print(&mut stderr);
|
||||
}
|
||||
|
||||
// Outside the BEEN_HERE_BEFORE check, we must only call functions we
|
||||
// know to be “async-signal-safe”, which includes sigaction(), raise(),
|
||||
// and _exit(), but generally doesn’t include anything that allocates.
|
||||
// https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03_03
|
||||
unsafe {
|
||||
_exit(sig);
|
||||
// Reset the signal to the default action, and reraise the signal.
|
||||
// Unlike libc::_exit(sig), which terminates the process normally,
|
||||
// this terminates abnormally just like an uncaught signal, allowing
|
||||
// mach (or your shell) to distinguish it from an ordinary exit, and
|
||||
// allows your kernel to make a core dump if configured to do so.
|
||||
let mut action: libc::sigaction = std::mem::zeroed();
|
||||
action.sa_sigaction = libc::SIG_DFL;
|
||||
libc::sigaction(sig, &action, std::ptr::null_mut());
|
||||
libc::raise(sig);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -169,7 +169,10 @@ class PostBuildCommands(CommandBase):
|
|||
try:
|
||||
check_call(args, env=env)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print("Servo exited with return value %d" % e.returncode)
|
||||
if e.returncode < 0:
|
||||
print(f"Servo was terminated by signal {-e.returncode}")
|
||||
else:
|
||||
print(f"Servo exited with non-zero status {e.returncode}")
|
||||
return e.returncode
|
||||
except OSError as e:
|
||||
if e.errno == 2:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue