Script: Implement TextDecoderStream (#38112)

This PR implements the `TextDecoderStream`. Other than introducing the
necessary mod and webidl files corresponding to `TextDecoderStream`,
this PR also involves some changes in `TextDecoder` and
`TrasnformStream`:

- The common part that can be shared between `TextDecoder` and
`TextDecoderStream` are extracted into a separate type
`script::dom::textdecodercommon::TextDecoderCommon`. This type could
probably use a different name because there is an interface called
`TextDecoderCommon` in the spec
(https://encoding.spec.whatwg.org/#textdecodercommon) which just gets
included in `TextDecoder` and `TextDecoderStream`.
- The three algorithms in `TransformStream` (`cancel`, `flush`, and
`transform`) all have become `enum` that has a `Js` variant for a JS
function object and a `Native` variant for a rust trait object. Whether
the cancel algorithm needs this enum type is debatable as I did not find
any interface in the spec that explicitly sets the cancel algorithm.

Testing: Existing WPT tests `tests/wpt/tests/encoding/stream` should be
sufficient
Fixes: #37723

---------

Signed-off-by: minghuaw <michael.wu1107@gmail.com>
Signed-off-by: minghuaw <wuminghua7@huawei.com>
Signed-off-by: Minghua Wu <michael.wu1107@gmail.com>
This commit is contained in:
minghuaw 2025-07-29 12:18:15 +08:00 committed by GitHub
parent 25822920cf
commit 554b2da1ad
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 797 additions and 752 deletions

View file

@ -3,10 +3,10 @@
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
use std::borrow::ToOwned;
use std::cell::{Cell, RefCell};
use std::cell::Cell;
use dom_struct::dom_struct;
use encoding_rs::{Decoder, DecoderResult, Encoding};
use encoding_rs::Encoding;
use js::rust::HandleObject;
use crate::dom::bindings::codegen::Bindings::TextDecoderBinding;
@ -19,37 +19,29 @@ use crate::dom::bindings::reflector::{Reflector, reflect_dom_object_with_proto};
use crate::dom::bindings::root::DomRoot;
use crate::dom::bindings::str::{DOMString, USVString};
use crate::dom::globalscope::GlobalScope;
use crate::dom::textdecodercommon::TextDecoderCommon;
use crate::script_runtime::CanGc;
/// <https://encoding.spec.whatwg.org/#textdecoder>
#[dom_struct]
#[allow(non_snake_case)]
pub(crate) struct TextDecoder {
reflector_: Reflector,
#[no_trace]
encoding: &'static Encoding,
fatal: bool,
ignoreBOM: bool,
#[ignore_malloc_size_of = "defined in encoding_rs"]
#[no_trace]
decoder: RefCell<Decoder>,
in_stream: RefCell<Vec<u8>>,
/// <https://encoding.spec.whatwg.org/#textdecodercommon>
decoder: TextDecoderCommon,
/// <https://encoding.spec.whatwg.org/#textdecoder-do-not-flush-flag>
do_not_flush: Cell<bool>,
}
#[allow(non_snake_case)]
impl TextDecoder {
fn new_inherited(encoding: &'static Encoding, fatal: bool, ignoreBOM: bool) -> TextDecoder {
let decoder = TextDecoderCommon::new_inherited(encoding, fatal, ignoreBOM);
TextDecoder {
reflector_: Reflector::new(),
encoding,
fatal,
ignoreBOM,
decoder: RefCell::new(if ignoreBOM {
encoding.new_decoder()
} else {
encoding.new_decoder_without_bom_handling()
}),
in_stream: RefCell::new(Vec::new()),
decoder,
do_not_flush: Cell::new(false),
}
}
@ -77,6 +69,7 @@ impl TextDecoder {
}
}
#[allow(non_snake_case)]
impl TextDecoderMethods<crate::DomTypeHolder> for TextDecoder {
/// <https://encoding.spec.whatwg.org/#dom-textdecoder>
fn Constructor(
@ -100,85 +93,59 @@ impl TextDecoderMethods<crate::DomTypeHolder> for TextDecoder {
))
}
// https://encoding.spec.whatwg.org/#dom-textdecoder-encoding
/// <https://encoding.spec.whatwg.org/#dom-textdecoder-encoding>
fn Encoding(&self) -> DOMString {
DOMString::from(self.encoding.name().to_ascii_lowercase())
DOMString::from(self.decoder.encoding().name().to_ascii_lowercase())
}
// https://encoding.spec.whatwg.org/#dom-textdecoder-fatal
/// <https://encoding.spec.whatwg.org/#dom-textdecoder-fatal>
fn Fatal(&self) -> bool {
self.fatal
self.decoder.fatal()
}
// https://encoding.spec.whatwg.org/#dom-textdecoder-ignorebom
/// <https://encoding.spec.whatwg.org/#dom-textdecoder-ignorebom>
fn IgnoreBOM(&self) -> bool {
self.ignoreBOM
self.decoder.ignore_bom()
}
// https://encoding.spec.whatwg.org/#dom-textdecoder-decode
/// <https://encoding.spec.whatwg.org/#dom-textdecoder-decode>
fn Decode(
&self,
input: Option<ArrayBufferViewOrArrayBuffer>,
options: &TextDecodeOptions,
) -> Fallible<USVString> {
// Step 1.
// Step 1. If thiss do not flush is false, then set thiss decoder to a new
// instance of thiss encodings decoder, thiss I/O queue to the I/O queue
// of bytes « end-of-queue », and thiss BOM seen to false.
if !self.do_not_flush.get() {
if self.ignoreBOM {
if self.decoder.ignore_bom() {
self.decoder
.replace(self.encoding.new_decoder_without_bom_handling());
.decoder()
.replace(self.decoder.encoding().new_decoder_without_bom_handling());
} else {
self.decoder.replace(self.encoding.new_decoder());
self.decoder
.decoder()
.replace(self.decoder.encoding().new_decoder_with_bom_removal());
}
self.in_stream.replace(Vec::new());
self.decoder.io_queue().replace(Vec::new());
}
// Step 2.
// Step 2. Set thiss do not flush to options["stream"].
self.do_not_flush.set(options.stream);
// Step 3.
match input {
Some(ArrayBufferViewOrArrayBuffer::ArrayBufferView(ref a)) => {
self.in_stream.borrow_mut().extend_from_slice(&a.to_vec());
},
Some(ArrayBufferViewOrArrayBuffer::ArrayBuffer(ref a)) => {
self.in_stream.borrow_mut().extend_from_slice(&a.to_vec());
},
None => {},
};
let mut decoder = self.decoder.borrow_mut();
let (remaining, s) = {
let mut in_stream = self.in_stream.borrow_mut();
let (remaining, s) = if self.fatal {
// Step 4.
let mut out_stream = String::with_capacity(
decoder
.max_utf8_buffer_length_without_replacement(in_stream.len())
.unwrap(),
);
// Step 5: Implemented by encoding_rs::Decoder.
match decoder.decode_to_string_without_replacement(
&in_stream,
&mut out_stream,
!options.stream,
) {
(DecoderResult::InputEmpty, read) => (in_stream.split_off(read), out_stream),
// Step 5.3.3.
_ => return Err(Error::Type("Decoding failed".to_owned())),
}
} else {
// Step 4.
let mut out_stream =
String::with_capacity(decoder.max_utf8_buffer_length(in_stream.len()).unwrap());
// Step 5: Implemented by encoding_rs::Decoder.
let (_result, read, _replaced) =
decoder.decode_to_string(&in_stream, &mut out_stream, !options.stream);
(in_stream.split_off(read), out_stream)
};
(remaining, s)
};
self.in_stream.replace(remaining);
Ok(USVString(s))
// Step 3. If input is given, then push a copy of input to thiss I/O queue.
// Step 4. Let output be the I/O queue of scalar values « end-of-queue ».
// Step 5. While true:
// Step 5.1 Let item be the result of reading from thiss I/O queue.
// Step 5.2 If item is end-of-queue and thiss do not flush is true,
// then return the result of running serialize I/O queue with this and output.
// Step 5.3 Otherwise:
// Step 5.3.1 Let result be the result of processing an item with item, thiss decoder,
// thiss I/O queue, output, and thiss error mode.
// Step 5.3.2 If result is finished, then return the result of running serialize I/O
// queue with this and output.
self.decoder
.decode(input.as_ref(), !options.stream)
.map(USVString)
}
}