From cd93841ba188ac89fc18ef597c07d0f135615a7d Mon Sep 17 00:00:00 2001 From: shanehandley Date: Mon, 27 Jan 2025 23:18:03 +1100 Subject: [PATCH] Remove UTF-8 BOM before parsing JSON (#35175) Signed-off-by: Shane Handley --- components/script/body.rs | 28 ++++++++++++++++--- components/script/dom/xmlhttprequest.rs | 17 ++--------- .../meta/fetch/api/response/json.any.js.ini | 8 ------ 3 files changed, 26 insertions(+), 27 deletions(-) delete mode 100644 tests/wpt/meta/fetch/api/response/json.any.js.ini diff --git a/components/script/body.rs b/components/script/body.rs index e8e72913df9..8546b3f9cd7 100644 --- a/components/script/body.rs +++ b/components/script/body.rs @@ -3,9 +3,9 @@ * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ use std::rc::Rc; -use std::{ptr, str}; +use std::{ptr, slice, str}; -use encoding_rs::UTF_8; +use encoding_rs::{Encoding, UTF_8}; use ipc_channel::ipc::{self, IpcReceiver, IpcSender}; use ipc_channel::router::ROUTER; use js::jsapi::{Heap, JSObject, JS_ClearPendingException, Value as JSValue}; @@ -821,8 +821,11 @@ fn run_text_data_algorithm(bytes: Vec) -> Fallible { #[allow(unsafe_code)] fn run_json_data_algorithm(cx: JSContext, bytes: Vec) -> Fallible { - let json_text = String::from_utf8_lossy(&bytes); - let json_text: Vec = json_text.encode_utf16().collect(); + // The JSON spec allows implementations to either ignore UTF-8 BOM or treat it as an error. + // `JS_ParseJSON` treats this as an error, so it is necessary for us to strip it if present. + // + // https://datatracker.ietf.org/doc/html/rfc8259#section-8.1 + let json_text = decode_to_utf16_with_bom_removal(&bytes, UTF_8); rooted!(in(*cx) let mut rval = UndefinedValue()); unsafe { if !JS_ParseJSON( @@ -908,6 +911,23 @@ pub(crate) fn run_array_buffer_data_algorithm( Ok(FetchedData::ArrayBuffer(rooted_heap)) } +#[allow(unsafe_code)] +pub(crate) fn decode_to_utf16_with_bom_removal( + bytes: &[u8], + encoding: &'static Encoding, +) -> Vec { + let mut decoder = encoding.new_decoder_with_bom_removal(); + let capacity = decoder + .max_utf16_buffer_length(bytes.len()) + .expect("Overflow"); + let mut utf16 = Vec::with_capacity(capacity); + let extra = unsafe { slice::from_raw_parts_mut(utf16.as_mut_ptr(), capacity) }; + let (_, read, written, _) = decoder.decode_to_utf16(bytes, extra, true); + assert_eq!(read, bytes.len()); + unsafe { utf16.set_len(written) } + utf16 +} + /// pub(crate) trait BodyMixin { /// diff --git a/components/script/dom/xmlhttprequest.rs b/components/script/dom/xmlhttprequest.rs index ff249653691..e0eca131d65 100644 --- a/components/script/dom/xmlhttprequest.rs +++ b/components/script/dom/xmlhttprequest.rs @@ -4,11 +4,11 @@ use std::borrow::ToOwned; use std::cell::Cell; +use std::cmp; use std::default::Default; use std::str::{self, FromStr}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; -use std::{cmp, slice}; use dom_struct::dom_struct; use encoding_rs::{Encoding, UTF_8}; @@ -38,7 +38,7 @@ use servo_atoms::Atom; use servo_url::ServoUrl; use url::Position; -use crate::body::{BodySource, Extractable, ExtractedBody}; +use crate::body::{decode_to_utf16_with_bom_removal, BodySource, Extractable, ExtractedBody}; use crate::document_loader::DocumentLoader; use crate::dom::bindings::buffer_source::HeapBufferSource; use crate::dom::bindings::cell::DomRefCell; @@ -1432,19 +1432,6 @@ impl XMLHttpRequest { return rval.set(NullValue()); } // Step 4 - fn decode_to_utf16_with_bom_removal(bytes: &[u8], encoding: &'static Encoding) -> Vec { - let mut decoder = encoding.new_decoder_with_bom_removal(); - let capacity = decoder - .max_utf16_buffer_length(bytes.len()) - .expect("Overflow"); - let mut utf16 = Vec::with_capacity(capacity); - let extra = unsafe { slice::from_raw_parts_mut(utf16.as_mut_ptr(), capacity) }; - let last = true; - let (_, read, written, _) = decoder.decode_to_utf16(bytes, extra, last); - assert_eq!(read, bytes.len()); - unsafe { utf16.set_len(written) } - utf16 - } // https://xhr.spec.whatwg.org/#json-response refers to // https://infra.spec.whatwg.org/#parse-json-from-bytes which refers to // https://encoding.spec.whatwg.org/#utf-8-decode which means diff --git a/tests/wpt/meta/fetch/api/response/json.any.js.ini b/tests/wpt/meta/fetch/api/response/json.any.js.ini deleted file mode 100644 index 180ef859b33..00000000000 --- a/tests/wpt/meta/fetch/api/response/json.any.js.ini +++ /dev/null @@ -1,8 +0,0 @@ -[json.any.worker.html] - [Ensure the correct JSON parser is used] - expected: FAIL - - -[json.any.html] - [Ensure the correct JSON parser is used] - expected: FAIL