mirror of
https://github.com/servo/servo.git
synced 2025-08-04 05:00:08 +01:00
Auto merge of #20431 - talklittle:issues-13234-5600-squashed, r=jdm
TextDecoder: streaming decode, ignoreBOM <!-- Please describe your changes on the following line: --> Implement streaming decode and ignoreBOM flag for TextDecoder. https://encoding.spec.whatwg.org/#dom-textdecoder-decode https://encoding.spec.whatwg.org/#dom-textdecoder-ignorebom --- <!-- Thank you for contributing to Servo! Please replace each `[ ]` by `[X]` when the step is complete, and replace `__` with appropriate data: --> - [x] `./mach build -d` does not report any errors - [x] `./mach test-tidy` does not report any errors - [x] These changes fix #13234 (github issue number if applicable). - [x] These changes fix #5600 (github issue number if applicable). <!-- Either: --> - [ ] There are tests for these changes OR - [x] These changes do not require tests because the wpt tests are used for testing: * /encoding/textdecoder-fatal-streaming.html * /encoding/textdecoder-streaming.html * /encoding/textdecoder-ignorebom.html <!-- Also, please make sure that "Allow edits from maintainers" checkbox is checked, so that we can help you if you get stuck somewhere along the way.--> <!-- Pull requests that do not address these steps are welcome, but they will require additional verification as part of the review process. --> <!-- Reviewable:start --> --- This change is [<img src="https://reviewable.io/review_button.svg" height="34" align="absmiddle" alt="Reviewable"/>](https://reviewable.io/reviews/servo/servo/20431) <!-- Reviewable:end -->
This commit is contained in:
commit
1981efcc35
8 changed files with 81 additions and 119 deletions
|
@ -47,7 +47,7 @@ use dom::bindings::root::{Dom, DomRoot};
|
||||||
use dom::bindings::str::{DOMString, USVString};
|
use dom::bindings::str::{DOMString, USVString};
|
||||||
use dom::bindings::utils::WindowProxyHandler;
|
use dom::bindings::utils::WindowProxyHandler;
|
||||||
use dom::document::PendingRestyle;
|
use dom::document::PendingRestyle;
|
||||||
use encoding_rs::Encoding;
|
use encoding_rs::{Decoder, Encoding};
|
||||||
use euclid::{Transform2D, Transform3D, Point2D, Vector2D, Rect, TypedSize2D, TypedScale};
|
use euclid::{Transform2D, Transform3D, Point2D, Vector2D, Rect, TypedSize2D, TypedScale};
|
||||||
use euclid::Length as EuclidLength;
|
use euclid::Length as EuclidLength;
|
||||||
use html5ever::{Prefix, LocalName, Namespace, QualName};
|
use html5ever::{Prefix, LocalName, Namespace, QualName};
|
||||||
|
@ -127,6 +127,9 @@ unsafe_no_jsmanaged_fields!(CSSError);
|
||||||
|
|
||||||
unsafe_no_jsmanaged_fields!(&'static Encoding);
|
unsafe_no_jsmanaged_fields!(&'static Encoding);
|
||||||
|
|
||||||
|
unsafe_no_jsmanaged_fields!(RefCell<Decoder>);
|
||||||
|
unsafe_no_jsmanaged_fields!(RefCell<Vec<u8>>);
|
||||||
|
|
||||||
unsafe_no_jsmanaged_fields!(Reflector);
|
unsafe_no_jsmanaged_fields!(Reflector);
|
||||||
|
|
||||||
unsafe_no_jsmanaged_fields!(Duration);
|
unsafe_no_jsmanaged_fields!(Duration);
|
||||||
|
|
|
@ -11,22 +11,34 @@ use dom::bindings::root::DomRoot;
|
||||||
use dom::bindings::str::{DOMString, USVString};
|
use dom::bindings::str::{DOMString, USVString};
|
||||||
use dom::globalscope::GlobalScope;
|
use dom::globalscope::GlobalScope;
|
||||||
use dom_struct::dom_struct;
|
use dom_struct::dom_struct;
|
||||||
use encoding_rs::Encoding;
|
use encoding_rs::{Decoder, DecoderResult, Encoding};
|
||||||
use std::borrow::ToOwned;
|
use std::borrow::ToOwned;
|
||||||
|
use std::cell::{Cell, RefCell};
|
||||||
|
|
||||||
#[dom_struct]
|
#[dom_struct]
|
||||||
pub struct TextDecoder {
|
pub struct TextDecoder {
|
||||||
reflector_: Reflector,
|
reflector_: Reflector,
|
||||||
encoding: &'static Encoding,
|
encoding: &'static Encoding,
|
||||||
fatal: bool,
|
fatal: bool,
|
||||||
|
ignoreBOM: bool,
|
||||||
|
#[ignore_malloc_size_of = "defined in encoding_rs"]
|
||||||
|
decoder: RefCell<Decoder>,
|
||||||
|
in_stream: RefCell<Vec<u8>>,
|
||||||
|
do_not_flush: Cell<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TextDecoder {
|
impl TextDecoder {
|
||||||
fn new_inherited(encoding: &'static Encoding, fatal: bool) -> TextDecoder {
|
fn new_inherited(encoding: &'static Encoding, fatal: bool, ignoreBOM: bool) -> TextDecoder {
|
||||||
TextDecoder {
|
TextDecoder {
|
||||||
reflector_: Reflector::new(),
|
reflector_: Reflector::new(),
|
||||||
encoding: encoding,
|
encoding: encoding,
|
||||||
fatal: fatal,
|
fatal: fatal,
|
||||||
|
ignoreBOM: ignoreBOM,
|
||||||
|
decoder: RefCell::new(
|
||||||
|
if ignoreBOM { encoding.new_decoder() } else { encoding.new_decoder_without_bom_handling() }
|
||||||
|
),
|
||||||
|
in_stream: RefCell::new(Vec::new()),
|
||||||
|
do_not_flush: Cell::new(false),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,8 +46,9 @@ impl TextDecoder {
|
||||||
Err(Error::Range("The given encoding is not supported.".to_owned()))
|
Err(Error::Range("The given encoding is not supported.".to_owned()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(global: &GlobalScope, encoding: &'static Encoding, fatal: bool) -> DomRoot<TextDecoder> {
|
pub fn new(global: &GlobalScope, encoding: &'static Encoding, fatal: bool, ignoreBOM: bool)
|
||||||
reflect_dom_object(Box::new(TextDecoder::new_inherited(encoding, fatal)),
|
-> DomRoot<TextDecoder> {
|
||||||
|
reflect_dom_object(Box::new(TextDecoder::new_inherited(encoding, fatal, ignoreBOM)),
|
||||||
global,
|
global,
|
||||||
TextDecoderBinding::Wrap)
|
TextDecoderBinding::Wrap)
|
||||||
}
|
}
|
||||||
|
@ -49,7 +62,7 @@ impl TextDecoder {
|
||||||
None => return TextDecoder::make_range_error(),
|
None => return TextDecoder::make_range_error(),
|
||||||
Some(enc) => enc
|
Some(enc) => enc
|
||||||
};
|
};
|
||||||
Ok(TextDecoder::new(global, encoding, options.fatal))
|
Ok(TextDecoder::new(global, encoding, options.fatal, options.ignoreBOM))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,30 +78,68 @@ impl TextDecoderMethods for TextDecoder {
|
||||||
self.fatal
|
self.fatal
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://encoding.spec.whatwg.org/#dom-textdecoder-ignorebom
|
||||||
|
fn IgnoreBOM(&self) -> bool {
|
||||||
|
self.ignoreBOM
|
||||||
|
}
|
||||||
|
|
||||||
// https://encoding.spec.whatwg.org/#dom-textdecoder-decode
|
// https://encoding.spec.whatwg.org/#dom-textdecoder-decode
|
||||||
fn Decode(
|
fn Decode(
|
||||||
&self,
|
&self,
|
||||||
input: Option<ArrayBufferViewOrArrayBuffer>,
|
input: Option<ArrayBufferViewOrArrayBuffer>,
|
||||||
_options: &TextDecodeOptions
|
options: &TextDecodeOptions
|
||||||
) -> Fallible<USVString> {
|
) -> Fallible<USVString> {
|
||||||
|
// Step 1.
|
||||||
|
if !self.do_not_flush.get() {
|
||||||
|
if self.ignoreBOM {
|
||||||
|
self.decoder.replace(self.encoding.new_decoder_without_bom_handling());
|
||||||
|
} else {
|
||||||
|
self.decoder.replace(self.encoding.new_decoder());
|
||||||
|
}
|
||||||
|
self.in_stream.replace(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2.
|
||||||
|
self.do_not_flush.set(options.stream);
|
||||||
|
|
||||||
|
// Step 3.
|
||||||
match input {
|
match input {
|
||||||
Some(arr) => {
|
Some(ArrayBufferViewOrArrayBuffer::ArrayBufferView(ref a)) => {
|
||||||
let vec: Vec<u8> = match arr {
|
self.in_stream.borrow_mut().extend_from_slice(&a.to_vec());
|
||||||
ArrayBufferViewOrArrayBuffer::ArrayBufferView(ref a) => a.to_vec(),
|
},
|
||||||
ArrayBufferViewOrArrayBuffer::ArrayBuffer(ref a) => a.to_vec()
|
Some(ArrayBufferViewOrArrayBuffer::ArrayBuffer(ref a)) => {
|
||||||
|
self.in_stream.borrow_mut().extend_from_slice(&a.to_vec());
|
||||||
|
},
|
||||||
|
None => {},
|
||||||
};
|
};
|
||||||
let s = if self.fatal {
|
|
||||||
match self.encoding.decode_without_bom_handling_and_without_replacement(&vec) {
|
let mut decoder = self.decoder.borrow_mut();
|
||||||
Some(s) => s,
|
let (remaining, s) = {
|
||||||
None => return Err(Error::Type("Decoding failed".to_owned())),
|
let mut in_stream = self.in_stream.borrow_mut();
|
||||||
|
|
||||||
|
let (remaining, s) = if self.fatal {
|
||||||
|
// Step 4.
|
||||||
|
let mut out_stream = String::with_capacity(
|
||||||
|
decoder.max_utf8_buffer_length_without_replacement(in_stream.len()).unwrap()
|
||||||
|
);
|
||||||
|
// Step 5: Implemented by encoding_rs::Decoder.
|
||||||
|
match decoder.decode_to_string_without_replacement(&in_stream, &mut out_stream, !options.stream) {
|
||||||
|
(DecoderResult::InputEmpty, read) => {
|
||||||
|
(in_stream.split_off(read), out_stream)
|
||||||
|
},
|
||||||
|
// Step 5.3.3.
|
||||||
|
_ => return Err(Error::Type("Decoding failed".to_owned())),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let (s, _has_errors) = self.encoding.decode_without_bom_handling(&vec);
|
// Step 4.
|
||||||
s
|
let mut out_stream = String::with_capacity(decoder.max_utf8_buffer_length(in_stream.len()).unwrap());
|
||||||
|
// Step 5: Implemented by encoding_rs::Decoder.
|
||||||
|
let (_result, read, _replaced) = decoder.decode_to_string(&in_stream, &mut out_stream, !options.stream);
|
||||||
|
(in_stream.split_off(read), out_stream)
|
||||||
};
|
};
|
||||||
Ok(USVString(s.into_owned()))
|
(remaining, s)
|
||||||
}
|
};
|
||||||
None => Ok(USVString("".to_owned()))
|
self.in_stream.replace(remaining);
|
||||||
}
|
Ok(USVString(s))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,18 +5,18 @@
|
||||||
// https://encoding.spec.whatwg.org/#interface-textdecoder
|
// https://encoding.spec.whatwg.org/#interface-textdecoder
|
||||||
dictionary TextDecoderOptions {
|
dictionary TextDecoderOptions {
|
||||||
boolean fatal = false;
|
boolean fatal = false;
|
||||||
// boolean ignoreBOM = false;
|
boolean ignoreBOM = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
dictionary TextDecodeOptions {
|
dictionary TextDecodeOptions {
|
||||||
// boolean stream = false;
|
boolean stream = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
[Constructor(optional DOMString label = "utf-8", optional TextDecoderOptions options), Exposed=(Window,Worker)]
|
[Constructor(optional DOMString label = "utf-8", optional TextDecoderOptions options), Exposed=(Window,Worker)]
|
||||||
interface TextDecoder {
|
interface TextDecoder {
|
||||||
readonly attribute DOMString encoding;
|
readonly attribute DOMString encoding;
|
||||||
readonly attribute boolean fatal;
|
readonly attribute boolean fatal;
|
||||||
// readonly attribute boolean ignoreBOM;
|
readonly attribute boolean ignoreBOM;
|
||||||
[Throws]
|
[Throws]
|
||||||
USVString decode(optional BufferSource input, optional TextDecodeOptions options);
|
USVString decode(optional BufferSource input, optional TextDecodeOptions options);
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
[textdecoder-byte-order-marks.html]
|
|
||||||
type: testharness
|
|
||||||
bug: https://github.com/servo/servo/issues/13233
|
|
||||||
[Byte-order marks: utf-8]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Byte-order marks: utf-16le]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Byte-order marks: utf-16be]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
[textdecoder-copy.any.html]
|
|
||||||
type: testharness
|
|
||||||
[Modify buffer after passing it in]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
|
|
||||||
[textdecoder-copy.any.worker.html]
|
|
||||||
type: testharness
|
|
||||||
[Modify buffer after passing it in]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
|
@ -1,6 +0,0 @@
|
||||||
[textdecoder-fatal-streaming.html]
|
|
||||||
type: testharness
|
|
||||||
bug: https://github.com/servo/servo/issues/13234
|
|
||||||
[Fatal flag, streaming cases]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
|
@ -1,15 +0,0 @@
|
||||||
[textdecoder-ignorebom.html]
|
|
||||||
type: testharness
|
|
||||||
bug: https://github.com/servo/servo/issues/5600
|
|
||||||
[BOM is ignored if ignoreBOM option is specified: utf-8]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[BOM is ignored if ignoreBOM option is specified: utf-16le]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[BOM is ignored if ignoreBOM option is specified: utf-16be]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[The ignoreBOM attribute of TextDecoder]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
|
@ -1,48 +0,0 @@
|
||||||
[textdecoder-streaming.html]
|
|
||||||
type: testharness
|
|
||||||
bug: https://github.com/servo/servo/issues/13234
|
|
||||||
[Streaming decode: utf-16le, 1 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-16le, 3 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-16le, 5 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-16be, 1 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-16be, 3 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-16be, 5 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-8, 1 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-8, 2 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-8, 3 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-8, 4 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-8, 5 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-16le, 2 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-16le, 4 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-16be, 2 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
||||||
[Streaming decode: utf-16be, 4 byte window]
|
|
||||||
expected: FAIL
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue