Auto merge of #29443 - delan:fix-thread-crash-output, r=jdm

Improve winit/mach segfault output

This patch improves the output that you see when the winit port segfaults, especially when the segfault happens on a layout or script thread, by making two changes to our crash handler and one change to mach.

* we make the crash handler use stderr instead of stdout, because [std::io::stdout](https://doc.rust-lang.org/std/io/fn.stdout.html) allocates when first called, which will often cause a second segfault
* we make the crash handler reraise the signal to terminate abnormally, allowing mach to distinguish it from exiting normally with the same status code as a signal number
* we make mach print different messages for whether the process terminated by signal or exited normally

You can try this yourself by running servo with the following patch:

```diff
diff --git a/components/layout_thread/lib.rs b/components/layout_thread/lib.rs
index ac1a7fe21e..77c4c9d087 100644
--- a/components/layout_thread/lib.rs
+++ b/components/layout_thread/lib.rs
@@ -589,6 +589,7 @@ impl LayoutThread {
             rw_data: &rw_data,
             possibly_locked_rw_data: &mut possibly_locked_rw_data,
         };
+        unsafe { std::ptr::read::<usize>(std::ptr::null()); }
         while self.handle_request(&mut rw_data) {
             // Loop indefinitely.
         }
```

Before:

```
$ ./mach run -d about:blank
[should print “Stack trace for "Layout(1,1)"” here, but segfaults]
Servo exited with return value 11
```

After:

```
$ ./mach run -d about:blank
Caught signal 11 in thread "Layout(1,1)"
[should ideally print backtrace here, but segfaults]
Servo was terminated by signal 11
```

---
<!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `___` with appropriate data: -->
- [x] `./mach build -d` does not report any errors
- [x] `./mach test-tidy` does not report any errors
- [x] These changes fix #29442

<!-- Either: -->
- [ ] There are tests for these changes OR
- [x] These changes do not require tests because it would be impractical to test them

(r? @jdm, @SimonSapin based on git log for crash handler)
This commit is contained in:
bors-servo 2023-02-28 15:29:50 +01:00 committed by GitHub
commit 05c431f626
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 9 deletions

View file

@ -8,24 +8,46 @@ pub fn install() {}
#[cfg(any(target_os = "macos", target_os = "linux"))]
pub fn install() {
use crate::backtrace;
use libc::_exit;
use sig::ffi::Sig;
use std::{io::Write, sync::atomic, thread};
extern "C" fn handler(sig: i32) {
// Only print crash message and backtrace the first time, to avoid
// infinite recursion if the printing causes another signal.
static BEEN_HERE_BEFORE: atomic::AtomicBool = atomic::AtomicBool::new(false);
if !BEEN_HERE_BEFORE.swap(true, atomic::Ordering::SeqCst) {
let stdout = std::io::stdout();
let mut stdout = stdout.lock();
let _ = write!(&mut stdout, "Stack trace");
// stderr is unbuffered, so we wont lose output if we crash later
// in this handler, and the std::io::stderr() call never allocates.
// std::io::stdout() allocates the first time its called, which in
// practice will often segfault (see below).
let stderr = std::io::stderr();
let mut stderr = stderr.lock();
let _ = write!(&mut stderr, "Caught signal {sig}");
if let Some(name) = thread::current().name() {
let _ = write!(&mut stdout, " for thread \"{}\"", name);
let _ = write!(&mut stderr, " in thread \"{}\"", name);
}
let _ = write!(&mut stdout, "\n");
let _ = backtrace::print(&mut stdout);
let _ = write!(&mut stderr, "\n");
// This call always allocates, which in practice will segfault if
// were handling a non-main-thread (e.g. layout) segfault. Strictly
// speaking in POSIX terms, this is also undefined behaviour.
let _ = backtrace::print(&mut stderr);
}
// Outside the BEEN_HERE_BEFORE check, we must only call functions we
// know to be “async-signal-safe”, which includes sigaction(), raise(),
// and _exit(), but generally doesnt include anything that allocates.
// https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03_03
unsafe {
_exit(sig);
// Reset the signal to the default action, and reraise the signal.
// Unlike libc::_exit(sig), which terminates the process normally,
// this terminates abnormally just like an uncaught signal, allowing
// mach (or your shell) to distinguish it from an ordinary exit, and
// allows your kernel to make a core dump if configured to do so.
let mut action: libc::sigaction = std::mem::zeroed();
action.sa_sigaction = libc::SIG_DFL;
libc::sigaction(sig, &action, std::ptr::null_mut());
libc::raise(sig);
}
}

View file

@ -169,7 +169,10 @@ class PostBuildCommands(CommandBase):
try:
check_call(args, env=env)
except subprocess.CalledProcessError as e:
print("Servo exited with return value %d" % e.returncode)
if e.returncode < 0:
print(f"Servo was terminated by signal {-e.returncode}")
else:
print(f"Servo exited with non-zero status {e.returncode}")
return e.returncode
except OSError as e:
if e.errno == 2: