mirror of
https://github.com/servo/servo.git
synced 2025-08-04 13:10:20 +01:00
Remove UTF-8 BOM before parsing JSON (#35175)
Signed-off-by: Shane Handley <shanehandley@fastmail.com>
This commit is contained in:
parent
5a0a60efc1
commit
cd93841ba1
3 changed files with 26 additions and 27 deletions
|
@ -3,9 +3,9 @@
|
||||||
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
use std::{ptr, str};
|
use std::{ptr, slice, str};
|
||||||
|
|
||||||
use encoding_rs::UTF_8;
|
use encoding_rs::{Encoding, UTF_8};
|
||||||
use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
|
use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
|
||||||
use ipc_channel::router::ROUTER;
|
use ipc_channel::router::ROUTER;
|
||||||
use js::jsapi::{Heap, JSObject, JS_ClearPendingException, Value as JSValue};
|
use js::jsapi::{Heap, JSObject, JS_ClearPendingException, Value as JSValue};
|
||||||
|
@ -821,8 +821,11 @@ fn run_text_data_algorithm(bytes: Vec<u8>) -> Fallible<FetchedData> {
|
||||||
|
|
||||||
#[allow(unsafe_code)]
|
#[allow(unsafe_code)]
|
||||||
fn run_json_data_algorithm(cx: JSContext, bytes: Vec<u8>) -> Fallible<FetchedData> {
|
fn run_json_data_algorithm(cx: JSContext, bytes: Vec<u8>) -> Fallible<FetchedData> {
|
||||||
let json_text = String::from_utf8_lossy(&bytes);
|
// The JSON spec allows implementations to either ignore UTF-8 BOM or treat it as an error.
|
||||||
let json_text: Vec<u16> = json_text.encode_utf16().collect();
|
// `JS_ParseJSON` treats this as an error, so it is necessary for us to strip it if present.
|
||||||
|
//
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc8259#section-8.1
|
||||||
|
let json_text = decode_to_utf16_with_bom_removal(&bytes, UTF_8);
|
||||||
rooted!(in(*cx) let mut rval = UndefinedValue());
|
rooted!(in(*cx) let mut rval = UndefinedValue());
|
||||||
unsafe {
|
unsafe {
|
||||||
if !JS_ParseJSON(
|
if !JS_ParseJSON(
|
||||||
|
@ -908,6 +911,23 @@ pub(crate) fn run_array_buffer_data_algorithm(
|
||||||
Ok(FetchedData::ArrayBuffer(rooted_heap))
|
Ok(FetchedData::ArrayBuffer(rooted_heap))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unsafe_code)]
|
||||||
|
pub(crate) fn decode_to_utf16_with_bom_removal(
|
||||||
|
bytes: &[u8],
|
||||||
|
encoding: &'static Encoding,
|
||||||
|
) -> Vec<u16> {
|
||||||
|
let mut decoder = encoding.new_decoder_with_bom_removal();
|
||||||
|
let capacity = decoder
|
||||||
|
.max_utf16_buffer_length(bytes.len())
|
||||||
|
.expect("Overflow");
|
||||||
|
let mut utf16 = Vec::with_capacity(capacity);
|
||||||
|
let extra = unsafe { slice::from_raw_parts_mut(utf16.as_mut_ptr(), capacity) };
|
||||||
|
let (_, read, written, _) = decoder.decode_to_utf16(bytes, extra, true);
|
||||||
|
assert_eq!(read, bytes.len());
|
||||||
|
unsafe { utf16.set_len(written) }
|
||||||
|
utf16
|
||||||
|
}
|
||||||
|
|
||||||
/// <https://fetch.spec.whatwg.org/#body>
|
/// <https://fetch.spec.whatwg.org/#body>
|
||||||
pub(crate) trait BodyMixin {
|
pub(crate) trait BodyMixin {
|
||||||
/// <https://fetch.spec.whatwg.org/#concept-body-disturbed>
|
/// <https://fetch.spec.whatwg.org/#concept-body-disturbed>
|
||||||
|
|
|
@ -4,11 +4,11 @@
|
||||||
|
|
||||||
use std::borrow::ToOwned;
|
use std::borrow::ToOwned;
|
||||||
use std::cell::Cell;
|
use std::cell::Cell;
|
||||||
|
use std::cmp;
|
||||||
use std::default::Default;
|
use std::default::Default;
|
||||||
use std::str::{self, FromStr};
|
use std::str::{self, FromStr};
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
use std::time::{Duration, Instant};
|
use std::time::{Duration, Instant};
|
||||||
use std::{cmp, slice};
|
|
||||||
|
|
||||||
use dom_struct::dom_struct;
|
use dom_struct::dom_struct;
|
||||||
use encoding_rs::{Encoding, UTF_8};
|
use encoding_rs::{Encoding, UTF_8};
|
||||||
|
@ -38,7 +38,7 @@ use servo_atoms::Atom;
|
||||||
use servo_url::ServoUrl;
|
use servo_url::ServoUrl;
|
||||||
use url::Position;
|
use url::Position;
|
||||||
|
|
||||||
use crate::body::{BodySource, Extractable, ExtractedBody};
|
use crate::body::{decode_to_utf16_with_bom_removal, BodySource, Extractable, ExtractedBody};
|
||||||
use crate::document_loader::DocumentLoader;
|
use crate::document_loader::DocumentLoader;
|
||||||
use crate::dom::bindings::buffer_source::HeapBufferSource;
|
use crate::dom::bindings::buffer_source::HeapBufferSource;
|
||||||
use crate::dom::bindings::cell::DomRefCell;
|
use crate::dom::bindings::cell::DomRefCell;
|
||||||
|
@ -1432,19 +1432,6 @@ impl XMLHttpRequest {
|
||||||
return rval.set(NullValue());
|
return rval.set(NullValue());
|
||||||
}
|
}
|
||||||
// Step 4
|
// Step 4
|
||||||
fn decode_to_utf16_with_bom_removal(bytes: &[u8], encoding: &'static Encoding) -> Vec<u16> {
|
|
||||||
let mut decoder = encoding.new_decoder_with_bom_removal();
|
|
||||||
let capacity = decoder
|
|
||||||
.max_utf16_buffer_length(bytes.len())
|
|
||||||
.expect("Overflow");
|
|
||||||
let mut utf16 = Vec::with_capacity(capacity);
|
|
||||||
let extra = unsafe { slice::from_raw_parts_mut(utf16.as_mut_ptr(), capacity) };
|
|
||||||
let last = true;
|
|
||||||
let (_, read, written, _) = decoder.decode_to_utf16(bytes, extra, last);
|
|
||||||
assert_eq!(read, bytes.len());
|
|
||||||
unsafe { utf16.set_len(written) }
|
|
||||||
utf16
|
|
||||||
}
|
|
||||||
// https://xhr.spec.whatwg.org/#json-response refers to
|
// https://xhr.spec.whatwg.org/#json-response refers to
|
||||||
// https://infra.spec.whatwg.org/#parse-json-from-bytes which refers to
|
// https://infra.spec.whatwg.org/#parse-json-from-bytes which refers to
|
||||||
// https://encoding.spec.whatwg.org/#utf-8-decode which means
|
// https://encoding.spec.whatwg.org/#utf-8-decode which means
|
||||||
|
|
|
@ -1,8 +0,0 @@
|
||||||
[json.any.worker.html]
|
|
||||||
[Ensure the correct JSON parser is used]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
|
|
||||||
[json.any.html]
|
|
||||||
[Ensure the correct JSON parser is used]
|
|
||||||
expected: FAIL
|
|
Loading…
Add table
Add a link
Reference in a new issue