Add a -Z replace-surrogates command-line option.

See #6564.
This commit is contained in:
Simon Sapin 2015-07-13 20:36:08 +02:00
parent 12195a5c4a
commit 90dbd86ed7
3 changed files with 36 additions and 2 deletions

View file

@ -352,10 +352,33 @@ pub fn jsstring_to_str(cx: *mut JSContext, s: *mut JSString) -> DOMString {
JS_GetTwoByteStringCharsAndLength(cx, ptr::null(), s, &mut length)
};
assert!(!chars.is_null());
let char_vec = unsafe {
let potentially_ill_formed_utf16 = unsafe {
slice::from_raw_parts(chars as *const u16, length as usize)
};
String::from_utf16(char_vec).unwrap()
let mut s = String::with_capacity(length as usize);
for item in ::rustc_unicode::str::utf16_items(potentially_ill_formed_utf16) {
use ::rustc_unicode::str::Utf16Item::*;
match item {
ScalarValue(c) => s.push(c),
LoneSurrogate(_) => {
// FIXME: Add more info like document URL in the message?
macro_rules! message {
() => {
"Found an unpaired surrogate in a DOM string. \
If you see this in real web content, \
please comment on https://github.com/servo/servo/issues/6564"
}
}
if ::util::opts::get().replace_surrogates {
warn!(message!());
s.push('\u{FFFD}');
} else {
panic!(message!());
}
}
}
}
s
}
}

View file

@ -21,6 +21,7 @@
#![feature(rc_unique)]
#![feature(slice_chars)]
#![feature(str_utf16)]
#![feature(unicode)]
#![feature(vec_push_all)]
#![deny(unsafe_code)]
@ -49,6 +50,7 @@ extern crate msg;
extern crate net_traits;
extern crate num;
extern crate rustc_serialize;
extern crate rustc_unicode;
extern crate time;
extern crate canvas;
extern crate canvas_traits;