mirror of
https://github.com/servo/servo.git
synced 2025-06-06 16:45:39 +00:00
Remove UTF-8 BOM before parsing JSON (#35175)
Signed-off-by: Shane Handley <shanehandley@fastmail.com>
This commit is contained in:
parent
5a0a60efc1
commit
cd93841ba1
3 changed files with 26 additions and 27 deletions
|
@ -3,9 +3,9 @@
|
|||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
||||
|
||||
use std::rc::Rc;
|
||||
use std::{ptr, str};
|
||||
use std::{ptr, slice, str};
|
||||
|
||||
use encoding_rs::UTF_8;
|
||||
use encoding_rs::{Encoding, UTF_8};
|
||||
use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
|
||||
use ipc_channel::router::ROUTER;
|
||||
use js::jsapi::{Heap, JSObject, JS_ClearPendingException, Value as JSValue};
|
||||
|
@ -821,8 +821,11 @@ fn run_text_data_algorithm(bytes: Vec<u8>) -> Fallible<FetchedData> {
|
|||
|
||||
#[allow(unsafe_code)]
|
||||
fn run_json_data_algorithm(cx: JSContext, bytes: Vec<u8>) -> Fallible<FetchedData> {
|
||||
let json_text = String::from_utf8_lossy(&bytes);
|
||||
let json_text: Vec<u16> = json_text.encode_utf16().collect();
|
||||
// The JSON spec allows implementations to either ignore UTF-8 BOM or treat it as an error.
|
||||
// `JS_ParseJSON` treats this as an error, so it is necessary for us to strip it if present.
|
||||
//
|
||||
// https://datatracker.ietf.org/doc/html/rfc8259#section-8.1
|
||||
let json_text = decode_to_utf16_with_bom_removal(&bytes, UTF_8);
|
||||
rooted!(in(*cx) let mut rval = UndefinedValue());
|
||||
unsafe {
|
||||
if !JS_ParseJSON(
|
||||
|
@ -908,6 +911,23 @@ pub(crate) fn run_array_buffer_data_algorithm(
|
|||
Ok(FetchedData::ArrayBuffer(rooted_heap))
|
||||
}
|
||||
|
||||
#[allow(unsafe_code)]
|
||||
pub(crate) fn decode_to_utf16_with_bom_removal(
|
||||
bytes: &[u8],
|
||||
encoding: &'static Encoding,
|
||||
) -> Vec<u16> {
|
||||
let mut decoder = encoding.new_decoder_with_bom_removal();
|
||||
let capacity = decoder
|
||||
.max_utf16_buffer_length(bytes.len())
|
||||
.expect("Overflow");
|
||||
let mut utf16 = Vec::with_capacity(capacity);
|
||||
let extra = unsafe { slice::from_raw_parts_mut(utf16.as_mut_ptr(), capacity) };
|
||||
let (_, read, written, _) = decoder.decode_to_utf16(bytes, extra, true);
|
||||
assert_eq!(read, bytes.len());
|
||||
unsafe { utf16.set_len(written) }
|
||||
utf16
|
||||
}
|
||||
|
||||
/// <https://fetch.spec.whatwg.org/#body>
|
||||
pub(crate) trait BodyMixin {
|
||||
/// <https://fetch.spec.whatwg.org/#concept-body-disturbed>
|
||||
|
|
|
@ -4,11 +4,11 @@
|
|||
|
||||
use std::borrow::ToOwned;
|
||||
use std::cell::Cell;
|
||||
use std::cmp;
|
||||
use std::default::Default;
|
||||
use std::str::{self, FromStr};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::{Duration, Instant};
|
||||
use std::{cmp, slice};
|
||||
|
||||
use dom_struct::dom_struct;
|
||||
use encoding_rs::{Encoding, UTF_8};
|
||||
|
@ -38,7 +38,7 @@ use servo_atoms::Atom;
|
|||
use servo_url::ServoUrl;
|
||||
use url::Position;
|
||||
|
||||
use crate::body::{BodySource, Extractable, ExtractedBody};
|
||||
use crate::body::{decode_to_utf16_with_bom_removal, BodySource, Extractable, ExtractedBody};
|
||||
use crate::document_loader::DocumentLoader;
|
||||
use crate::dom::bindings::buffer_source::HeapBufferSource;
|
||||
use crate::dom::bindings::cell::DomRefCell;
|
||||
|
@ -1432,19 +1432,6 @@ impl XMLHttpRequest {
|
|||
return rval.set(NullValue());
|
||||
}
|
||||
// Step 4
|
||||
fn decode_to_utf16_with_bom_removal(bytes: &[u8], encoding: &'static Encoding) -> Vec<u16> {
|
||||
let mut decoder = encoding.new_decoder_with_bom_removal();
|
||||
let capacity = decoder
|
||||
.max_utf16_buffer_length(bytes.len())
|
||||
.expect("Overflow");
|
||||
let mut utf16 = Vec::with_capacity(capacity);
|
||||
let extra = unsafe { slice::from_raw_parts_mut(utf16.as_mut_ptr(), capacity) };
|
||||
let last = true;
|
||||
let (_, read, written, _) = decoder.decode_to_utf16(bytes, extra, last);
|
||||
assert_eq!(read, bytes.len());
|
||||
unsafe { utf16.set_len(written) }
|
||||
utf16
|
||||
}
|
||||
// https://xhr.spec.whatwg.org/#json-response refers to
|
||||
// https://infra.spec.whatwg.org/#parse-json-from-bytes which refers to
|
||||
// https://encoding.spec.whatwg.org/#utf-8-decode which means
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue