/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ use std::borrow::Cow; use std::cell::Cell; use std::rc::Rc; use base::cross_process_instant::CrossProcessInstant; use base::id::PipelineId; use base64::Engine as _; use base64::engine::general_purpose; use content_security_policy::sandboxing_directive::SandboxingFlagSet; use devtools_traits::ScriptToDevtoolsControlMsg; use dom_struct::dom_struct; use embedder_traits::resources::{self, Resource}; use encoding_rs::Encoding; use html5ever::buffer_queue::BufferQueue; use html5ever::tendril::fmt::UTF8; use html5ever::tendril::{ByteTendril, StrTendril, TendrilSink}; use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; use html5ever::{Attribute, ExpandedName, LocalName, QualName, local_name, ns}; use hyper_serde::Serde; use markup5ever::TokenizerResult; use mime::{self, Mime}; use net_traits::mime_classifier::{ApacheBugFlag, MediaType, MimeClassifier, NoSniffFlag}; use net_traits::policy_container::PolicyContainer; use net_traits::request::RequestId; use net_traits::{ FetchMetadata, FetchResponseListener, LoadContext, Metadata, NetworkError, ReferrerPolicy, ResourceFetchTiming, ResourceTimingType, }; use profile_traits::time::{ ProfilerCategory, ProfilerChan, TimerMetadata, TimerMetadataFrameType, TimerMetadataReflowType, }; use profile_traits::time_profile; use script_traits::DocumentActivity; use servo_config::pref; use servo_url::ServoUrl; use style::context::QuirksMode as ServoQuirksMode; use tendril::stream::LossyDecoder; use crate::document_loader::{DocumentLoader, LoadType}; use crate::dom::bindings::cell::DomRefCell; use crate::dom::bindings::codegen::Bindings::DocumentBinding::{ DocumentMethods, DocumentReadyState, }; use crate::dom::bindings::codegen::Bindings::HTMLImageElementBinding::HTMLImageElementMethods; use crate::dom::bindings::codegen::Bindings::HTMLMediaElementBinding::HTMLMediaElementMethods; use crate::dom::bindings::codegen::Bindings::HTMLTemplateElementBinding::HTMLTemplateElementMethods; use crate::dom::bindings::codegen::Bindings::NodeBinding::NodeMethods; use crate::dom::bindings::codegen::Bindings::ShadowRootBinding::{ ShadowRootMode, SlotAssignmentMode, }; use crate::dom::bindings::inheritance::Castable; use crate::dom::bindings::refcounted::Trusted; use crate::dom::bindings::reflector::{DomGlobal, Reflector, reflect_dom_object}; use crate::dom::bindings::root::{Dom, DomRoot, MutNullableDom}; use crate::dom::bindings::settings_stack::is_execution_stack_empty; use crate::dom::bindings::str::{DOMString, USVString}; use crate::dom::characterdata::CharacterData; use crate::dom::comment::Comment; use crate::dom::csp::{GlobalCspReporting, Violation, parse_csp_list_from_metadata}; use crate::dom::customelementregistry::CustomElementReactionStack; use crate::dom::document::{Document, DocumentSource, HasBrowsingContext, IsHTMLDocument}; use crate::dom::documentfragment::DocumentFragment; use crate::dom::documenttype::DocumentType; use crate::dom::element::{CustomElementCreationMode, Element, ElementCreator}; use crate::dom::html::htmlformelement::{FormControlElementHelpers, HTMLFormElement}; use crate::dom::html::htmlimageelement::HTMLImageElement; use crate::dom::html::htmlinputelement::HTMLInputElement; use crate::dom::html::htmlscriptelement::{HTMLScriptElement, ScriptResult}; use crate::dom::html::htmltemplateelement::HTMLTemplateElement; use crate::dom::node::{Node, ShadowIncluding}; use crate::dom::performanceentry::PerformanceEntry; use crate::dom::performancenavigationtiming::PerformanceNavigationTiming; use crate::dom::processinginstruction::ProcessingInstruction; use crate::dom::reportingendpoint::ReportingEndpoint; use crate::dom::shadowroot::IsUserAgentWidget; use crate::dom::text::Text; use crate::dom::types::HTMLMediaElement; use crate::dom::virtualmethods::vtable_for; use crate::network_listener::PreInvoke; use crate::realms::enter_realm; use crate::script_runtime::{CanGc, IntroductionType}; use crate::script_thread::ScriptThread; mod async_html; mod html; mod prefetch; mod xml; pub(crate) use html::serialize_html_fragment; #[dom_struct] /// The parser maintains two input streams: one for input from script through /// document.write(), and one for input from network. /// /// There is no concrete representation of the insertion point, instead it /// always points to just before the next character from the network input, /// with all of the script input before itself. /// /// ```text /// ... script input ... | ... network input ... /// ^ /// insertion point /// ``` pub(crate) struct ServoParser { reflector: Reflector, /// The document associated with this parser. document: Dom, /// The BOM sniffing state. /// /// `None` means we've found the BOM, we've found there isn't one, or /// we're not parsing from a byte stream. `Some` contains the BOM bytes /// found so far. bom_sniff: DomRefCell>>, /// The decoder used for the network input. network_decoder: DomRefCell>, /// Input received from network. #[ignore_malloc_size_of = "Defined in html5ever"] #[no_trace] network_input: BufferQueue, /// Input received from script. Used only to support document.write(). #[ignore_malloc_size_of = "Defined in html5ever"] #[no_trace] script_input: BufferQueue, /// The tokenizer of this parser. tokenizer: Tokenizer, /// Whether to expect any further input from the associated network request. last_chunk_received: Cell, /// Whether this parser should avoid passing any further data to the tokenizer. suspended: Cell, /// script_nesting_level: Cell, /// aborted: Cell, /// script_created_parser: bool, /// We do a quick-and-dirty parse of the input looking for resources to prefetch. // TODO: if we had speculative parsing, we could do this when speculatively // building the DOM. https://github.com/servo/servo/pull/19203 prefetch_tokenizer: prefetch::Tokenizer, #[ignore_malloc_size_of = "Defined in html5ever"] #[no_trace] prefetch_input: BufferQueue, // The whole input as a string, if needed for the devtools Sources panel. // TODO: use a faster type for concatenating strings? content_for_devtools: Option>, } pub(crate) struct ElementAttribute { name: QualName, value: DOMString, } #[derive(Clone, Copy, JSTraceable, MallocSizeOf, PartialEq)] pub(crate) enum ParsingAlgorithm { Normal, Fragment, } impl ElementAttribute { pub(crate) fn new(name: QualName, value: DOMString) -> ElementAttribute { ElementAttribute { name, value } } } impl ServoParser { pub(crate) fn parser_is_not_active(&self) -> bool { self.can_write() } /// pub(crate) fn parse_html_document( document: &Document, input: Option, url: ServoUrl, can_gc: CanGc, ) { // Step 1. Set document's type to "html". // // Set by callers of this function and asserted here assert!(document.is_html_document()); // Step 2. Create an HTML parser parser, associated with document. let parser = if pref!(dom_servoparser_async_html_tokenizer_enabled) { ServoParser::new( document, Tokenizer::AsyncHtml(self::async_html::Tokenizer::new(document, url, None)), ParserKind::Normal, can_gc, ) } else { ServoParser::new( document, Tokenizer::Html(self::html::Tokenizer::new( document, url, None, ParsingAlgorithm::Normal, )), ParserKind::Normal, can_gc, ) }; // Step 3. Place html into the input stream for parser. The encoding confidence is irrelevant. // Step 4. Start parser and let it run until it has consumed all the // characters just inserted into the input stream. // // Set as the document's current parser and initialize with `input`, if given. if let Some(input) = input { parser.parse_complete_string_chunk(String::from(input), can_gc); } else { parser.document.set_current_parser(Some(&parser)); } } /// pub(crate) fn parse_html_fragment( context: &Element, input: DOMString, allow_declarative_shadow_roots: bool, can_gc: CanGc, ) -> impl Iterator> + use<'_> { let context_node = context.upcast::(); let context_document = context_node.owner_doc(); let window = context_document.window(); let url = context_document.url(); // Step 1. Let document be a Document node whose type is "html". let loader = DocumentLoader::new_with_threads( context_document.loader().resource_threads().clone(), Some(url.clone()), ); let document = Document::new( window, HasBrowsingContext::No, Some(url.clone()), context_document.origin().clone(), IsHTMLDocument::HTMLDocument, None, None, DocumentActivity::Inactive, DocumentSource::FromParser, loader, None, None, Default::default(), false, allow_declarative_shadow_roots, Some(context_document.insecure_requests_policy()), context_document.has_trustworthy_ancestor_or_current_origin(), context_document.custom_element_reaction_stack(), can_gc, ); // Step 2. If context's node document is in quirks mode, then set document's mode to "quirks". // Step 3. Otherwise, if context's node document is in limited-quirks mode, then set document's // mode to "limited-quirks". document.set_quirks_mode(context_document.quirks_mode()); // NOTE: The following steps happened as part of Step 1. // Step 4. If allowDeclarativeShadowRoots is true, then set document's // allow declarative shadow roots to true. // Step 5. Create a new HTML parser, and associate it with document. // Step 11. let form = context_node .inclusive_ancestors(ShadowIncluding::No) .find(|element| element.is::()); let fragment_context = FragmentContext { context_elem: context_node, form_elem: form.as_deref(), context_element_allows_scripting: context_document.scripting_enabled(), }; let parser = ServoParser::new( &document, Tokenizer::Html(self::html::Tokenizer::new( &document, url, Some(fragment_context), ParsingAlgorithm::Fragment, )), ParserKind::Normal, can_gc, ); parser.parse_complete_string_chunk(String::from(input), can_gc); // Step 14. let root_element = document.GetDocumentElement().expect("no document element"); FragmentParsingResult { inner: root_element.upcast::().children(), } } pub(crate) fn parse_html_script_input(document: &Document, url: ServoUrl) { let parser = ServoParser::new( document, Tokenizer::Html(self::html::Tokenizer::new( document, url, None, ParsingAlgorithm::Normal, )), ParserKind::ScriptCreated, CanGc::note(), ); *parser.bom_sniff.borrow_mut() = None; document.set_current_parser(Some(&parser)); } pub(crate) fn parse_xml_document( document: &Document, input: Option, url: ServoUrl, can_gc: CanGc, ) { let parser = ServoParser::new( document, Tokenizer::Xml(self::xml::Tokenizer::new(document, url)), ParserKind::Normal, can_gc, ); // Set as the document's current parser and initialize with `input`, if given. if let Some(input) = input { parser.parse_complete_string_chunk(String::from(input), can_gc); } else { parser.document.set_current_parser(Some(&parser)); } } pub(crate) fn script_nesting_level(&self) -> usize { self.script_nesting_level.get() } pub(crate) fn is_script_created(&self) -> bool { self.script_created_parser } /// Corresponds to the latter part of the "Otherwise" branch of the 'An end /// tag whose tag name is "script"' of /// /// /// This first moves everything from the script input to the beginning of /// the network input, effectively resetting the insertion point to just /// before the next character to be consumed. /// /// /// ```text /// | ... script input ... network input ... /// ^ /// insertion point /// ``` pub(crate) fn resume_with_pending_parsing_blocking_script( &self, script: &HTMLScriptElement, result: ScriptResult, can_gc: CanGc, ) { assert!(self.suspended.get()); self.suspended.set(false); self.script_input.swap_with(&self.network_input); while let Some(chunk) = self.script_input.pop_front() { self.network_input.push_back(chunk); } let script_nesting_level = self.script_nesting_level.get(); assert_eq!(script_nesting_level, 0); self.script_nesting_level.set(script_nesting_level + 1); script.execute(result, can_gc); self.script_nesting_level.set(script_nesting_level); if !self.suspended.get() && !self.aborted.get() { self.parse_sync(can_gc); } } pub(crate) fn can_write(&self) -> bool { self.script_created_parser || self.script_nesting_level.get() > 0 } /// Steps 6-8 of pub(crate) fn write(&self, text: DOMString, can_gc: CanGc) { assert!(self.can_write()); if self.document.has_pending_parsing_blocking_script() { // There is already a pending parsing blocking script so the // parser is suspended, we just append everything to the // script input and abort these steps. self.script_input.push_back(String::from(text).into()); return; } // There is no pending parsing blocking script, so all previous calls // to document.write() should have seen their entire input tokenized // and process, with nothing pushed to the parser script input. assert!(self.script_input.is_empty()); let input = BufferQueue::default(); input.push_back(String::from(text).into()); let profiler_chan = self .document .window() .as_global_scope() .time_profiler_chan() .clone(); let profiler_metadata = TimerMetadata { url: self.document.url().as_str().into(), iframe: TimerMetadataFrameType::RootWindow, incremental: TimerMetadataReflowType::FirstReflow, }; self.tokenize( |tokenizer| { tokenizer.feed( &input, can_gc, profiler_chan.clone(), profiler_metadata.clone(), ) }, can_gc, ); if self.suspended.get() { // Parser got suspended, insert remaining input at end of // script input, following anything written by scripts executed // reentrantly during this call. while let Some(chunk) = input.pop_front() { self.script_input.push_back(chunk); } return; } assert!(input.is_empty()); } // Steps 4-6 of https://html.spec.whatwg.org/multipage/#dom-document-close pub(crate) fn close(&self, can_gc: CanGc) { assert!(self.script_created_parser); // Step 4. self.last_chunk_received.set(true); if self.suspended.get() { // Step 5. return; } // Step 6. self.parse_sync(can_gc); } // https://html.spec.whatwg.org/multipage/#abort-a-parser pub(crate) fn abort(&self, can_gc: CanGc) { assert!(!self.aborted.get()); self.aborted.set(true); // Step 1. self.script_input.replace_with(BufferQueue::default()); self.network_input.replace_with(BufferQueue::default()); // Step 2. self.document .set_ready_state(DocumentReadyState::Interactive, can_gc); // Step 3. self.tokenizer.end(can_gc); self.document.set_current_parser(None); // Step 4. self.document .set_ready_state(DocumentReadyState::Complete, can_gc); } // https://html.spec.whatwg.org/multipage/#active-parser pub(crate) fn is_active(&self) -> bool { self.script_nesting_level() > 0 && !self.aborted.get() } #[cfg_attr(crown, allow(crown::unrooted_must_root))] fn new_inherited(document: &Document, tokenizer: Tokenizer, kind: ParserKind) -> Self { // Store the whole input for the devtools Sources panel, if the devtools server is running // and we are parsing for a document load (not just things like innerHTML). // TODO: check if a devtools client is actually connected and/or wants the sources? let content_for_devtools = (document.global().devtools_chan().is_some() && document.has_browsing_context()) .then_some(DomRefCell::new(String::new())); ServoParser { reflector: Reflector::new(), document: Dom::from_ref(document), bom_sniff: DomRefCell::new(Some(Vec::with_capacity(3))), network_decoder: DomRefCell::new(Some(NetworkDecoder::new(document.encoding()))), network_input: BufferQueue::default(), script_input: BufferQueue::default(), tokenizer, last_chunk_received: Cell::new(false), suspended: Default::default(), script_nesting_level: Default::default(), aborted: Default::default(), script_created_parser: kind == ParserKind::ScriptCreated, prefetch_tokenizer: prefetch::Tokenizer::new(document), prefetch_input: BufferQueue::default(), content_for_devtools, } } #[cfg_attr(crown, allow(crown::unrooted_must_root))] fn new( document: &Document, tokenizer: Tokenizer, kind: ParserKind, can_gc: CanGc, ) -> DomRoot { reflect_dom_object( Box::new(ServoParser::new_inherited(document, tokenizer, kind)), document.window(), can_gc, ) } fn push_tendril_input_chunk(&self, chunk: StrTendril) { if let Some(mut content_for_devtools) = self .content_for_devtools .as_ref() .map(|content| content.borrow_mut()) { // TODO: append these chunks more efficiently content_for_devtools.push_str(chunk.as_ref()); } if chunk.is_empty() { return; } // Per https://github.com/whatwg/html/issues/1495 // stylesheets should not be loaded for documents // without browsing contexts. // https://github.com/whatwg/html/issues/1495#issuecomment-230334047 // suggests that no content should be preloaded in such a case. // We're conservative, and only prefetch for documents // with browsing contexts. if self.document.browsing_context().is_some() { // Push the chunk into the prefetch input stream, // which is tokenized eagerly, to scan for resources // to prefetch. If the user script uses `document.write()` // to overwrite the network input, this prefetching may // have been wasted, but in most cases it won't. self.prefetch_input.push_back(chunk.clone()); self.prefetch_tokenizer.feed(&self.prefetch_input); } // Push the chunk into the network input stream, // which is tokenized lazily. self.network_input.push_back(chunk); } fn push_bytes_input_chunk(&self, chunk: Vec) { // BOM sniff. This is needed because NetworkDecoder will switch the // encoding based on the BOM, but it won't change // `self.document.encoding` in the process. { let mut bom_sniff = self.bom_sniff.borrow_mut(); if let Some(partial_bom) = bom_sniff.as_mut() { if partial_bom.len() + chunk.len() >= 3 { partial_bom.extend(chunk.iter().take(3 - partial_bom.len()).copied()); if let Some((encoding, _)) = Encoding::for_bom(partial_bom) { self.document.set_encoding(encoding); } drop(bom_sniff); *self.bom_sniff.borrow_mut() = None; } else { partial_bom.extend(chunk.iter().copied()); } } } // For byte input, we convert it to text using the network decoder. let chunk = self .network_decoder .borrow_mut() .as_mut() .unwrap() .decode(chunk); self.push_tendril_input_chunk(chunk); } fn push_string_input_chunk(&self, chunk: String) { // If the input is a string, we don't have a BOM. if self.bom_sniff.borrow().is_some() { *self.bom_sniff.borrow_mut() = None; } // The input has already been decoded as a string, so doesn't need // to be decoded by the network decoder again. let chunk = StrTendril::from(chunk); self.push_tendril_input_chunk(chunk); } fn parse_sync(&self, can_gc: CanGc) { assert!(self.script_input.is_empty()); // This parser will continue to parse while there is either pending input or // the parser remains unsuspended. if self.last_chunk_received.get() { if let Some(decoder) = self.network_decoder.borrow_mut().take() { let chunk = decoder.finish(); if !chunk.is_empty() { self.network_input.push_back(chunk); } } } if self.aborted.get() { return; } let profiler_chan = self .document .window() .as_global_scope() .time_profiler_chan() .clone(); let profiler_metadata = TimerMetadata { url: self.document.url().as_str().into(), iframe: TimerMetadataFrameType::RootWindow, incremental: TimerMetadataReflowType::FirstReflow, }; self.tokenize( |tokenizer| { tokenizer.feed( &self.network_input, can_gc, profiler_chan.clone(), profiler_metadata.clone(), ) }, can_gc, ); if self.suspended.get() { return; } assert!(self.network_input.is_empty()); if self.last_chunk_received.get() { self.finish(can_gc); } } fn parse_complete_string_chunk(&self, input: String, can_gc: CanGc) { self.document.set_current_parser(Some(self)); self.push_string_input_chunk(input); self.last_chunk_received.set(true); if !self.suspended.get() { self.parse_sync(can_gc); } } fn parse_bytes_chunk(&self, input: Vec, can_gc: CanGc) { let _realm = enter_realm(&*self.document); self.document.set_current_parser(Some(self)); self.push_bytes_input_chunk(input); if !self.suspended.get() { self.parse_sync(can_gc); } } fn tokenize(&self, feed: F, can_gc: CanGc) where F: Fn(&Tokenizer) -> TokenizerResult>, { loop { assert!(!self.suspended.get()); assert!(!self.aborted.get()); self.document.window().reflow_if_reflow_timer_expired(); let script = match feed(&self.tokenizer) { TokenizerResult::Done => return, TokenizerResult::Script(script) => script, }; // https://html.spec.whatwg.org/multipage/#parsing-main-incdata // branch "An end tag whose tag name is "script" // The spec says to perform the microtask checkpoint before // setting the insertion mode back from Text, but this is not // possible with the way servo and html5ever currently // relate to each other, and hopefully it is not observable. if is_execution_stack_empty() { self.document .window() .as_global_scope() .perform_a_microtask_checkpoint(can_gc); } let script_nesting_level = self.script_nesting_level.get(); self.script_nesting_level.set(script_nesting_level + 1); script.set_initial_script_text(); let introduction_type_override = (script_nesting_level > 0).then_some(IntroductionType::INJECTED_SCRIPT); script.prepare(introduction_type_override, can_gc); self.script_nesting_level.set(script_nesting_level); if self.document.has_pending_parsing_blocking_script() { self.suspended.set(true); return; } if self.aborted.get() { return; } } } // https://html.spec.whatwg.org/multipage/#the-end fn finish(&self, can_gc: CanGc) { assert!(!self.suspended.get()); assert!(self.last_chunk_received.get()); assert!(self.script_input.is_empty()); assert!(self.network_input.is_empty()); assert!(self.network_decoder.borrow().is_none()); // Step 1. self.document .set_ready_state(DocumentReadyState::Interactive, can_gc); // Step 2. self.tokenizer.end(can_gc); self.document.set_current_parser(None); // Steps 3-12 are in another castle, namely finish_load. let url = self.tokenizer.url().clone(); self.document.finish_load(LoadType::PageSource(url), can_gc); // Send the source contents to devtools, if needed. if let Some(content_for_devtools) = self .content_for_devtools .as_ref() .map(|content| content.take()) { let global = self.document.global(); let chan = global.devtools_chan().expect("Guaranteed by new"); let pipeline_id = self.document.global().pipeline_id(); let _ = chan.send(ScriptToDevtoolsControlMsg::UpdateSourceContent( pipeline_id, content_for_devtools, )); } } } struct FragmentParsingResult where I: Iterator>, { inner: I, } impl Iterator for FragmentParsingResult where I: Iterator>, { type Item = DomRoot; fn next(&mut self) -> Option> { let next = self.inner.next()?; next.remove_self(CanGc::note()); Some(next) } fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } } #[derive(JSTraceable, MallocSizeOf, PartialEq)] enum ParserKind { Normal, ScriptCreated, } #[derive(JSTraceable, MallocSizeOf)] #[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)] enum Tokenizer { Html(self::html::Tokenizer), AsyncHtml(self::async_html::Tokenizer), Xml(self::xml::Tokenizer), } impl Tokenizer { fn feed( &self, input: &BufferQueue, can_gc: CanGc, profiler_chan: ProfilerChan, profiler_metadata: TimerMetadata, ) -> TokenizerResult> { match *self { Tokenizer::Html(ref tokenizer) => time_profile!( ProfilerCategory::ScriptParseHTML, Some(profiler_metadata), profiler_chan, || tokenizer.feed(input), ), Tokenizer::AsyncHtml(ref tokenizer) => time_profile!( ProfilerCategory::ScriptParseHTML, Some(profiler_metadata), profiler_chan, || tokenizer.feed(input, can_gc), ), Tokenizer::Xml(ref tokenizer) => time_profile!( ProfilerCategory::ScriptParseXML, Some(profiler_metadata), profiler_chan, || tokenizer.feed(input), ), } } fn end(&self, can_gc: CanGc) { match *self { Tokenizer::Html(ref tokenizer) => tokenizer.end(), Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.end(can_gc), Tokenizer::Xml(ref tokenizer) => tokenizer.end(), } } fn url(&self) -> &ServoUrl { match *self { Tokenizer::Html(ref tokenizer) => tokenizer.url(), Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.url(), Tokenizer::Xml(ref tokenizer) => tokenizer.url(), } } fn set_plaintext_state(&self) { match *self { Tokenizer::Html(ref tokenizer) => tokenizer.set_plaintext_state(), Tokenizer::AsyncHtml(ref tokenizer) => tokenizer.set_plaintext_state(), Tokenizer::Xml(_) => unimplemented!(), } } } /// /// This does not have the relevant fields, but mimics the intent /// of the struct when used in loading document spec algorithms. struct NavigationParams { /// policy_container: PolicyContainer, /// content-type of this document, if known. Otherwise need to sniff it content_type: Option, /// final_sandboxing_flag_set: SandboxingFlagSet, /// resource_header: Vec, } /// The context required for asynchronously fetching a document /// and parsing it progressively. pub(crate) struct ParserContext { /// The parser that initiated the request. parser: Option>, /// Is this a synthesized document is_synthesized_document: bool, /// Has a document already been loaded (relevant for checking the resource header) has_loaded_document: bool, /// The pipeline associated with this document. id: PipelineId, /// The URL for this document. url: ServoUrl, /// timing data for this resource resource_timing: ResourceFetchTiming, /// pushed entry index pushed_entry_index: Option, /// params required in document load algorithms navigation_params: NavigationParams, } impl ParserContext { pub(crate) fn new(id: PipelineId, url: ServoUrl) -> ParserContext { ParserContext { parser: None, is_synthesized_document: false, has_loaded_document: false, id, url, resource_timing: ResourceFetchTiming::new(ResourceTimingType::Navigation), pushed_entry_index: None, navigation_params: NavigationParams { policy_container: Default::default(), content_type: None, final_sandboxing_flag_set: SandboxingFlagSet::empty(), resource_header: vec![], }, } } pub(crate) fn set_policy_container(&mut self, policy_container: Option<&PolicyContainer>) { let Some(policy_container) = policy_container else { return; }; self.navigation_params.policy_container = policy_container.clone(); } /// fn create_policy_container_from_fetch_response(metadata: &Metadata) -> PolicyContainer { // Step 1. If response's URL's scheme is "blob", then return a clone of response's URL's blob URL entry's environment's policy container. // TODO // Step 2. Let result be a new policy container. // Step 7. Return result. PolicyContainer { // Step 3. Set result's CSP list to the result of parsing a response's Content Security Policies given response. csp_list: parse_csp_list_from_metadata(&metadata.headers), // Step 5. Set result's referrer policy to the result of parsing the `Referrer-Policy` header given response. [REFERRERPOLICY] referrer_policy: ReferrerPolicy::parse_header_for_response(&metadata.headers), } } /// fn initialize_document_object(&self, document: &Document) { // Step 9. Let document be a new Document, with document.set_policy_container(self.navigation_params.policy_container.clone()); document.set_active_sandboxing_flag_set(self.navigation_params.final_sandboxing_flag_set); } /// fn load_document(&mut self, can_gc: CanGc) { assert!(!self.has_loaded_document); self.has_loaded_document = true; let Some(ref parser) = self.parser.as_ref().map(|p| p.root()) else { return; }; // Step 1. Let type be the computed type of navigationParams's response. let content_type = &self.navigation_params.content_type; let mime_type = MimeClassifier::default().classify( LoadContext::Browsing, NoSniffFlag::Off, ApacheBugFlag::from_content_type(content_type.as_ref()), content_type, &self.navigation_params.resource_header, ); // Step 2. If the user agent has been configured to process resources of the given type using // some mechanism other than rendering the content in a navigable, then skip this step. // Otherwise, if the type is one of the following types: let Some(media_type) = MimeClassifier::get_media_type(&mime_type) else { let page = format!( "

Unknown content type ({}).

", &mime_type, ); self.load_inline_unknown_content(parser, page); return; }; match media_type { // Return the result of loading an HTML document, given navigationParams. MediaType::Html => self.load_html_document(parser), // Return the result of loading an XML document given navigationParams and type. MediaType::Xml => self.load_xml_document(parser), // Return the result of loading a text document given navigationParams and type. MediaType::JavaScript | MediaType::Json | MediaType::Text | MediaType::Css => { self.load_text_document(parser) }, // Return the result of loading a media document given navigationParams and type. MediaType::Image | MediaType::AudioVideo => { self.load_media_document(parser, media_type, &mime_type) }, MediaType::Font => { let page = format!( "

Unable to load font with content type ({}).

", &mime_type, ); self.load_inline_unknown_content(parser, page); return; }, }; parser.parse_bytes_chunk( std::mem::take(&mut self.navigation_params.resource_header), can_gc, ); } /// fn load_html_document(&self, parser: &ServoParser) { // Step 1. Let document be the result of creating and initializing a // Document object given "html", "text/html", and navigationParams. self.initialize_document_object(&parser.document); } /// fn load_xml_document(&self, parser: &ServoParser) { // When faced with displaying an XML file inline, provided navigation params navigationParams // and a string type, user agents must follow the requirements defined in XML and Namespaces in XML, // XML Media Types, DOM, and other relevant specifications to create and initialize a // Document object document, given "xml", type, and navigationParams, and return that Document. // They must also create a corresponding XML parser. [XML] [XMLNS] [RFC7303] [DOM] self.initialize_document_object(&parser.document); } /// fn load_text_document(&self, parser: &ServoParser) { // Step 4. Create an HTML parser and associate it with the document. // Act as if the tokenizer had emitted a start tag token with the tag name "pre" followed by // a single U+000A LINE FEED (LF) character, and switch the HTML parser's tokenizer to the PLAINTEXT state. // Each task that the networking task source places on the task queue while fetching runs must then // fill the parser's input byte stream with the fetched bytes and cause the HTML parser to perform // the appropriate processing of the input stream. let page = "
\n".into();
        parser.push_string_input_chunk(page);
        parser.parse_sync(CanGc::note());
        parser.tokenizer.set_plaintext_state();
    }

    /// 
    fn load_media_document(
        &mut self,
        parser: &ServoParser,
        media_type: MediaType,
        mime_type: &Mime,
    ) {
        // Step 8. Act as if the user agent had stopped parsing document.
        self.is_synthesized_document = true;
        // Step 3. Populate with html/head/body given document.
        let page = "".into();
        parser.push_string_input_chunk(page);
        parser.parse_sync(CanGc::note());

        let doc = &parser.document;
        // Step 5. Set the appropriate attribute of the element host element, as described below,
        // to the address of the image, video, or audio resource.
        let node = if media_type == MediaType::Image {
            let img = Element::create(
                QualName::new(None, ns!(html), local_name!("img")),
                None,
                doc,
                ElementCreator::ParserCreated(1),
                CustomElementCreationMode::Asynchronous,
                None,
                CanGc::note(),
            );
            let img = DomRoot::downcast::(img).unwrap();
            img.SetSrc(USVString(self.url.to_string()));
            DomRoot::upcast::(img)
        } else if mime_type.type_() == mime::AUDIO {
            let audio = Element::create(
                QualName::new(None, ns!(html), local_name!("audio")),
                None,
                doc,
                ElementCreator::ParserCreated(1),
                CustomElementCreationMode::Asynchronous,
                None,
                CanGc::note(),
            );
            let audio = DomRoot::downcast::(audio).unwrap();
            audio.SetSrc(USVString(self.url.to_string()));
            DomRoot::upcast::(audio)
        } else {
            let video = Element::create(
                QualName::new(None, ns!(html), local_name!("video")),
                None,
                doc,
                ElementCreator::ParserCreated(1),
                CustomElementCreationMode::Asynchronous,
                None,
                CanGc::note(),
            );
            let video = DomRoot::downcast::(video).unwrap();
            video.SetSrc(USVString(self.url.to_string()));
            DomRoot::upcast::(video)
        };
        // Step 4. Append an element host element for the media, as described below, to the body element.
        let doc_body = DomRoot::upcast::(doc.GetBody().unwrap());
        doc_body
            .AppendChild(&node, CanGc::note())
            .expect("Appending failed");
    }

    /// 
    fn load_inline_unknown_content(&mut self, parser: &ServoParser, page: String) {
        self.is_synthesized_document = true;
        parser.push_string_input_chunk(page);
        parser.parse_sync(CanGc::note());
    }
}

impl FetchResponseListener for ParserContext {
    fn process_request_body(&mut self, _: RequestId) {}

    fn process_request_eof(&mut self, _: RequestId) {}

    fn process_response(&mut self, _: RequestId, meta_result: Result) {
        let (metadata, error) = match meta_result {
            Ok(meta) => (
                Some(match meta {
                    FetchMetadata::Unfiltered(m) => m,
                    FetchMetadata::Filtered { unsafe_, .. } => unsafe_,
                }),
                None,
            ),
            Err(error) => (
                // Check variant without moving
                match &error {
                    NetworkError::SslValidation(..) |
                    NetworkError::Internal(..) |
                    NetworkError::Crash(..) => {
                        let mut meta = Metadata::default(self.url.clone());
                        let mime: Option = "text/html".parse().ok();
                        meta.set_content_type(mime.as_ref());
                        Some(meta)
                    },
                    _ => None,
                },
                Some(error),
            ),
        };
        let content_type: Option = metadata
            .clone()
            .and_then(|meta| meta.content_type)
            .map(Serde::into_inner)
            .map(Into::into);

        let (policy_container, endpoints_list) = match metadata.as_ref() {
            None => (PolicyContainer::default(), None),
            Some(metadata) => (
                Self::create_policy_container_from_fetch_response(metadata),
                ReportingEndpoint::parse_reporting_endpoints_header(
                    &self.url.clone(),
                    &metadata.headers,
                ),
            ),
        };

        let parser = match ScriptThread::page_headers_available(&self.id, metadata, CanGc::note()) {
            Some(parser) => parser,
            None => return,
        };
        if parser.aborted.get() {
            return;
        }

        let _realm = enter_realm(&*parser.document);

        // From Step 23.8.3 of https://html.spec.whatwg.org/multipage/#navigate
        // Let finalSandboxFlags be the union of targetSnapshotParams's sandboxing flags and
        // policyContainer's CSP list's CSP-derived sandboxing flags.
        // TODO: implement targetSnapshotParam's sandboxing flags
        let final_sandboxing_flag_set = policy_container
            .csp_list
            .as_ref()
            .and_then(|csp| csp.get_sandboxing_flag_set_for_document())
            .unwrap_or(SandboxingFlagSet::empty());

        if let Some(endpoints) = endpoints_list {
            parser.document.window().set_endpoints_list(endpoints);
        }
        self.parser = Some(Trusted::new(&*parser));
        self.navigation_params = NavigationParams {
            policy_container,
            content_type,
            final_sandboxing_flag_set,
            resource_header: vec![],
        };
        self.submit_resource_timing();

        // Part of https://html.spec.whatwg.org/multipage/#loading-a-document
        //
        // Step 3. If, given type, the new resource is to be handled by displaying some sort of inline content,
        // e.g., a native rendering of the content or an error message because the specified type is not supported,
        // then return the result of creating a document for inline content that doesn't have a DOM given
        // navigationParams's navigable, navigationParams's id, navigationParams's navigation timing type,
        // and navigationParams's user involvement.
        if let Some(error) = error {
            let page = match error {
                NetworkError::SslValidation(reason, bytes) => {
                    let page = resources::read_string(Resource::BadCertHTML);
                    let page = page.replace("${reason}", &reason);
                    let encoded_bytes = general_purpose::STANDARD_NO_PAD.encode(bytes);
                    let page = page.replace("${bytes}", encoded_bytes.as_str());
                    page.replace("${secret}", &net_traits::PRIVILEGED_SECRET.to_string())
                },
                NetworkError::Internal(reason) => {
                    let page = resources::read_string(Resource::NetErrorHTML);
                    page.replace("${reason}", &reason)
                },
                NetworkError::Crash(details) => {
                    let page = resources::read_string(Resource::CrashHTML);
                    page.replace("${details}", &details)
                },
                NetworkError::LoadCancelled => {
                    // The next load will show a page
                    return;
                },
            };
            self.load_inline_unknown_content(&parser, page);
        }
    }

    fn process_response_chunk(&mut self, _: RequestId, payload: Vec) {
        if self.is_synthesized_document {
            return;
        }
        let Some(parser) = self.parser.as_ref().map(|p| p.root()) else {
            return;
        };
        if parser.aborted.get() {
            return;
        }
        if !self.has_loaded_document {
            // https://mimesniff.spec.whatwg.org/#read-the-resource-header
            self.navigation_params
                .resource_header
                .extend_from_slice(&payload);
            // the number of bytes in buffer is greater than or equal to 1445.
            if self.navigation_params.resource_header.len() >= 1445 {
                self.load_document(CanGc::note());
            }
        } else {
            parser.parse_bytes_chunk(payload, CanGc::note());
        }
    }

    // This method is called via script_thread::handle_fetch_eof, so we must call
    // submit_resource_timing in this function
    // Resource listeners are called via net_traits::Action::process, which handles submission for them
    fn process_response_eof(
        &mut self,
        _: RequestId,
        status: Result,
    ) {
        let parser = match self.parser.as_ref() {
            Some(parser) => parser.root(),
            None => return,
        };
        if parser.aborted.get() {
            return;
        }

        match status {
            // are we throwing this away or can we use it?
            Ok(_) => (),
            // TODO(Savago): we should send a notification to callers #5463.
            Err(err) => debug!("Failed to load page URL {}, error: {:?}", self.url, err),
        }

        // https://mimesniff.spec.whatwg.org/#read-the-resource-header
        //
        // the end of the resource is reached.
        if !self.has_loaded_document {
            self.load_document(CanGc::note());
        }

        let _realm = enter_realm(&*parser);

        parser
            .document
            .set_redirect_count(self.resource_timing.redirect_count);

        parser.last_chunk_received.set(true);
        if !parser.suspended.get() {
            parser.parse_sync(CanGc::note());
        }

        // TODO: Only update if this is the current document resource.
        // TODO(mrobinson): Pass a proper fetch_start parameter here instead of `CrossProcessInstant::now()`.
        if let Some(pushed_index) = self.pushed_entry_index {
            let document = &parser.document;
            let performance_entry = PerformanceNavigationTiming::new(
                &document.global(),
                CrossProcessInstant::now(),
                document,
                CanGc::note(),
            );
            document
                .global()
                .performance()
                .update_entry(pushed_index, performance_entry.upcast::());
        }
    }

    fn resource_timing_mut(&mut self) -> &mut ResourceFetchTiming {
        &mut self.resource_timing
    }

    fn resource_timing(&self) -> &ResourceFetchTiming {
        &self.resource_timing
    }

    // store a PerformanceNavigationTiming entry in the globalscope's Performance buffer
    fn submit_resource_timing(&mut self) {
        let parser = match self.parser.as_ref() {
            Some(parser) => parser.root(),
            None => return,
        };
        if parser.aborted.get() {
            return;
        }

        let document = &parser.document;

        // TODO: Pass a proper fetch start time here.
        let performance_entry = PerformanceNavigationTiming::new(
            &document.global(),
            CrossProcessInstant::now(),
            document,
            CanGc::note(),
        );
        self.pushed_entry_index = document.global().performance().queue_entry(
            performance_entry.upcast::(),
            CanGc::note(),
        );
    }

    fn process_csp_violations(&mut self, _request_id: RequestId, violations: Vec) {
        let parser = match self.parser.as_ref() {
            Some(parser) => parser.root(),
            None => return,
        };
        let document = &parser.document;
        let global = &document.global();
        // TODO(https://github.com/w3c/webappsec-csp/issues/687): Update after spec is resolved
        global.report_csp_violations(violations, None, None);
    }
}

impl PreInvoke for ParserContext {}

pub(crate) struct FragmentContext<'a> {
    pub(crate) context_elem: &'a Node,
    pub(crate) form_elem: Option<&'a Node>,
    pub(crate) context_element_allows_scripting: bool,
}

#[cfg_attr(crown, allow(crown::unrooted_must_root))]
fn insert(
    parent: &Node,
    reference_child: Option<&Node>,
    child: NodeOrText>,
    parsing_algorithm: ParsingAlgorithm,
    custom_element_reaction_stack: &CustomElementReactionStack,
    can_gc: CanGc,
) {
    match child {
        NodeOrText::AppendNode(n) => {
            // https://html.spec.whatwg.org/multipage/#insert-a-foreign-element
            // applies if this is an element; if not, it may be
            // https://html.spec.whatwg.org/multipage/#insert-a-comment
            let element_in_non_fragment =
                parsing_algorithm != ParsingAlgorithm::Fragment && n.is::();
            if element_in_non_fragment {
                custom_element_reaction_stack.push_new_element_queue();
            }
            parent.InsertBefore(&n, reference_child, can_gc).unwrap();
            if element_in_non_fragment {
                custom_element_reaction_stack.pop_current_element_queue(can_gc);
            }
        },
        NodeOrText::AppendText(t) => {
            // https://html.spec.whatwg.org/multipage/#insert-a-character
            let text = reference_child
                .and_then(Node::GetPreviousSibling)
                .or_else(|| parent.GetLastChild())
                .and_then(DomRoot::downcast::);

            if let Some(text) = text {
                text.upcast::().append_data(&t);
            } else {
                let text = Text::new(String::from(t).into(), &parent.owner_doc(), can_gc);
                parent
                    .InsertBefore(text.upcast(), reference_child, can_gc)
                    .unwrap();
            }
        },
    }
}

#[derive(JSTraceable, MallocSizeOf)]
#[cfg_attr(crown, crown::unrooted_must_root_lint::must_root)]
pub(crate) struct Sink {
    #[no_trace]
    base_url: ServoUrl,
    document: Dom,
    current_line: Cell,
    script: MutNullableDom,
    parsing_algorithm: ParsingAlgorithm,
    #[conditional_malloc_size_of]
    custom_element_reaction_stack: Rc,
}

impl Sink {
    fn same_tree(&self, x: &Dom, y: &Dom) -> bool {
        let x = x.downcast::().expect("Element node expected");
        let y = y.downcast::().expect("Element node expected");

        x.is_in_same_home_subtree(y)
    }

    fn has_parent_node(&self, node: &Dom) -> bool {
        node.GetParentNode().is_some()
    }
}

impl TreeSink for Sink {
    type Output = Self;
    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
    fn finish(self) -> Self {
        self
    }

    type Handle = Dom;
    type ElemName<'a>
        = ExpandedName<'a>
    where
        Self: 'a;

    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
    fn get_document(&self) -> Dom {
        Dom::from_ref(self.document.upcast())
    }

    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
    fn get_template_contents(&self, target: &Dom) -> Dom {
        let template = target
            .downcast::()
            .expect("tried to get template contents of non-HTMLTemplateElement in HTML parsing");
        Dom::from_ref(template.Content(CanGc::note()).upcast())
    }

    fn same_node(&self, x: &Dom, y: &Dom) -> bool {
        x == y
    }

    fn elem_name<'a>(&self, target: &'a Dom) -> ExpandedName<'a> {
        let elem = target
            .downcast::()
            .expect("tried to get name of non-Element in HTML parsing");
        ExpandedName {
            ns: elem.namespace(),
            local: elem.local_name(),
        }
    }

    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
    fn create_element(
        &self,
        name: QualName,
        attrs: Vec,
        flags: ElementFlags,
    ) -> Dom {
        let attrs = attrs
            .into_iter()
            .map(|attr| ElementAttribute::new(attr.name, DOMString::from(String::from(attr.value))))
            .collect();
        let parsing_algorithm = if flags.template {
            ParsingAlgorithm::Fragment
        } else {
            self.parsing_algorithm
        };
        let element = create_element_for_token(
            name,
            attrs,
            &self.document,
            ElementCreator::ParserCreated(self.current_line.get()),
            parsing_algorithm,
            &self.custom_element_reaction_stack,
            CanGc::note(),
        );
        Dom::from_ref(element.upcast())
    }

    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
    fn create_comment(&self, text: StrTendril) -> Dom {
        let comment = Comment::new(
            DOMString::from(String::from(text)),
            &self.document,
            None,
            CanGc::note(),
        );
        Dom::from_ref(comment.upcast())
    }

    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Dom {
        let doc = &*self.document;
        let pi = ProcessingInstruction::new(
            DOMString::from(String::from(target)),
            DOMString::from(String::from(data)),
            doc,
            CanGc::note(),
        );
        Dom::from_ref(pi.upcast())
    }

    fn associate_with_form(
        &self,
        target: &Dom,
        form: &Dom,
        nodes: (&Dom, Option<&Dom>),
    ) {
        let (element, prev_element) = nodes;
        let tree_node = prev_element.map_or(element, |prev| {
            if self.has_parent_node(element) {
                element
            } else {
                prev
            }
        });
        if !self.same_tree(tree_node, form) {
            return;
        }

        let node = target;
        let form = DomRoot::downcast::(DomRoot::from_ref(&**form))
            .expect("Owner must be a form element");

        let elem = node.downcast::();
        let control = elem.and_then(|e| e.as_maybe_form_control());

        if let Some(control) = control {
            control.set_form_owner_from_parser(&form, CanGc::note());
        }
    }

    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
    fn append_before_sibling(&self, sibling: &Dom, new_node: NodeOrText>) {
        let parent = sibling
            .GetParentNode()
            .expect("append_before_sibling called on node without parent");

        insert(
            &parent,
            Some(sibling),
            new_node,
            self.parsing_algorithm,
            &self.custom_element_reaction_stack,
            CanGc::note(),
        );
    }

    fn parse_error(&self, msg: Cow<'static, str>) {
        debug!("Parse error: {}", msg);
    }

    fn set_quirks_mode(&self, mode: QuirksMode) {
        let mode = match mode {
            QuirksMode::Quirks => ServoQuirksMode::Quirks,
            QuirksMode::LimitedQuirks => ServoQuirksMode::LimitedQuirks,
            QuirksMode::NoQuirks => ServoQuirksMode::NoQuirks,
        };
        self.document.set_quirks_mode(mode);
    }

    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
    fn append(&self, parent: &Dom, child: NodeOrText>) {
        insert(
            parent,
            None,
            child,
            self.parsing_algorithm,
            &self.custom_element_reaction_stack,
            CanGc::note(),
        );
    }

    #[cfg_attr(crown, allow(crown::unrooted_must_root))]
    fn append_based_on_parent_node(
        &self,
        elem: &Dom,
        prev_elem: &Dom,
        child: NodeOrText>,
    ) {
        if self.has_parent_node(elem) {
            self.append_before_sibling(elem, child);
        } else {
            self.append(prev_elem, child);
        }
    }

    fn append_doctype_to_document(
        &self,
        name: StrTendril,
        public_id: StrTendril,
        system_id: StrTendril,
    ) {
        let doc = &*self.document;
        let doctype = DocumentType::new(
            DOMString::from(String::from(name)),
            Some(DOMString::from(String::from(public_id))),
            Some(DOMString::from(String::from(system_id))),
            doc,
            CanGc::note(),
        );
        doc.upcast::()
            .AppendChild(doctype.upcast(), CanGc::note())
            .expect("Appending failed");
    }

    fn add_attrs_if_missing(&self, target: &Dom, attrs: Vec) {
        let elem = target
            .downcast::()
            .expect("tried to set attrs on non-Element in HTML parsing");
        for attr in attrs {
            elem.set_attribute_from_parser(
                attr.name,
                DOMString::from(String::from(attr.value)),
                None,
                CanGc::note(),
            );
        }
    }

    fn remove_from_parent(&self, target: &Dom) {
        if let Some(ref parent) = target.GetParentNode() {
            parent.RemoveChild(target, CanGc::note()).unwrap();
        }
    }

    fn mark_script_already_started(&self, node: &Dom) {
        let script = node.downcast::();
        if let Some(script) = script {
            script.set_already_started(true)
        }
    }

    fn reparent_children(&self, node: &Dom, new_parent: &Dom) {
        while let Some(ref child) = node.GetFirstChild() {
            new_parent.AppendChild(child, CanGc::note()).unwrap();
        }
    }

    /// 
    /// Specifically, the `` cases.
    fn is_mathml_annotation_xml_integration_point(&self, handle: &Dom) -> bool {
        let elem = handle.downcast::().unwrap();
        elem.get_attribute(&ns!(), &local_name!("encoding"))
            .is_some_and(|attr| {
                attr.value().eq_ignore_ascii_case("text/html") ||
                    attr.value().eq_ignore_ascii_case("application/xhtml+xml")
            })
    }

    fn set_current_line(&self, line_number: u64) {
        self.current_line.set(line_number);
    }

    fn pop(&self, node: &Dom) {
        let node = DomRoot::from_ref(&**node);
        vtable_for(&node).pop();
    }

    fn allow_declarative_shadow_roots(&self, intended_parent: &Dom) -> bool {
        intended_parent.owner_doc().allow_declarative_shadow_roots()
    }

    /// 
    /// A start tag whose tag name is "template"
    /// Attach shadow path
    fn attach_declarative_shadow(
        &self,
        host: &Dom,
        template: &Dom,
        attributes: &[Attribute],
    ) -> bool {
        attach_declarative_shadow_inner(host, template, attributes)
    }
}

/// 
fn create_element_for_token(
    name: QualName,
    attrs: Vec,
    document: &Document,
    creator: ElementCreator,
    parsing_algorithm: ParsingAlgorithm,
    custom_element_reaction_stack: &CustomElementReactionStack,
    can_gc: CanGc,
) -> DomRoot {
    // Step 3.
    let is = attrs
        .iter()
        .find(|attr| attr.name.local.eq_str_ignore_ascii_case("is"))
        .map(|attr| LocalName::from(attr.value.str()));

    // Step 4.
    let definition = document.lookup_custom_element_definition(&name.ns, &name.local, is.as_ref());

    // Step 5.
    let will_execute_script =
        definition.is_some() && parsing_algorithm != ParsingAlgorithm::Fragment;

    // Step 6.
    if will_execute_script {
        // Step 6.1.
        document.increment_throw_on_dynamic_markup_insertion_counter();
        // Step 6.2
        if is_execution_stack_empty() {
            document
                .window()
                .as_global_scope()
                .perform_a_microtask_checkpoint(can_gc);
        }
        // Step 6.3
        custom_element_reaction_stack.push_new_element_queue()
    }

    // Step 7.
    let creation_mode = if will_execute_script {
        CustomElementCreationMode::Synchronous
    } else {
        CustomElementCreationMode::Asynchronous
    };

    let element = Element::create(name, is, document, creator, creation_mode, None, can_gc);

    // https://html.spec.whatwg.org/multipage#the-input-element:value-sanitization-algorithm-3
    // says to invoke sanitization "when an input element is first created";
    // however, since sanitization requires content attributes to function,
    // it can't mean that literally.
    // Indeed, to make sanitization work correctly, we need to _not_ sanitize
    // until after all content attributes have been added

    let maybe_input = element.downcast::();
    if let Some(input) = maybe_input {
        input.disable_sanitization();
    }

    // Step 8
    for attr in attrs {
        element.set_attribute_from_parser(attr.name, attr.value, None, can_gc);
    }

    // _now_ we can sanitize (and we sanitize now even if the "value"
    // attribute isn't present!)
    if let Some(input) = maybe_input {
        input.enable_sanitization();
    }

    // Step 9.
    if will_execute_script {
        // Steps 9.1 - 9.2.
        custom_element_reaction_stack.pop_current_element_queue(can_gc);
        // Step 9.3.
        document.decrement_throw_on_dynamic_markup_insertion_counter();
    }

    // TODO: Step 10.
    // TODO: Step 11.

    // Step 12 is handled in `associate_with_form`.

    // Step 13.
    element
}

#[derive(JSTraceable, MallocSizeOf)]
struct NetworkDecoder {
    #[ignore_malloc_size_of = "Defined in tendril"]
    #[custom_trace]
    decoder: LossyDecoder,
}

impl NetworkDecoder {
    fn new(encoding: &'static Encoding) -> Self {
        Self {
            decoder: LossyDecoder::new_encoding_rs(encoding, Default::default()),
        }
    }

    fn decode(&mut self, chunk: Vec) -> StrTendril {
        self.decoder.process(ByteTendril::from(&*chunk));
        std::mem::take(&mut self.decoder.inner_sink_mut().output)
    }

    fn finish(self) -> StrTendril {
        self.decoder.finish()
    }
}

#[derive(Default, JSTraceable)]
struct NetworkSink {
    #[no_trace]
    output: StrTendril,
}

impl TendrilSink for NetworkSink {
    type Output = StrTendril;

    fn process(&mut self, t: StrTendril) {
        if self.output.is_empty() {
            self.output = t;
        } else {
            self.output.push_tendril(&t);
        }
    }

    fn error(&mut self, _desc: Cow<'static, str>) {}

    fn finish(self) -> Self::Output {
        self.output
    }
}

fn attach_declarative_shadow_inner(host: &Node, template: &Node, attributes: &[Attribute]) -> bool {
    let host_element = host.downcast::().unwrap();

    if host_element.shadow_root().is_some() {
        return false;
    }

    let template_element = template.downcast::().unwrap();

    // Step 3. Let mode be template start tag's shadowrootmode attribute's value.
    // Step 4. Let clonable be true if template start tag has a shadowrootclonable attribute; otherwise false.
    // Step 5. Let delegatesfocus be true if template start tag
    // has a shadowrootdelegatesfocus attribute; otherwise false.
    // Step 6. Let serializable be true if template start tag
    // has a shadowrootserializable attribute; otherwise false.
    let mut shadow_root_mode = ShadowRootMode::Open;
    let mut clonable = false;
    let mut delegatesfocus = false;
    let mut serializable = false;

    let attributes: Vec = attributes
        .iter()
        .map(|attr| {
            ElementAttribute::new(
                attr.name.clone(),
                DOMString::from(String::from(attr.value.clone())),
            )
        })
        .collect();

    attributes
        .iter()
        .for_each(|attr: &ElementAttribute| match attr.name.local {
            local_name!("shadowrootmode") => {
                if attr.value.str().eq_ignore_ascii_case("open") {
                    shadow_root_mode = ShadowRootMode::Open;
                } else if attr.value.str().eq_ignore_ascii_case("closed") {
                    shadow_root_mode = ShadowRootMode::Closed;
                } else {
                    unreachable!("shadowrootmode value is not open nor closed");
                }
            },
            local_name!("shadowrootclonable") => {
                clonable = true;
            },
            local_name!("shadowrootdelegatesfocus") => {
                delegatesfocus = true;
            },
            local_name!("shadowrootserializable") => {
                serializable = true;
            },
            _ => {},
        });

    // Step 8.1. Attach a shadow root with declarative shadow host element,
    // mode, clonable, serializable, delegatesFocus, and "named".
    match host_element.attach_shadow(
        IsUserAgentWidget::No,
        shadow_root_mode,
        clonable,
        serializable,
        delegatesfocus,
        SlotAssignmentMode::Named,
        CanGc::note(),
    ) {
        Ok(shadow_root) => {
            // Step 8.3. Set shadow's declarative to true.
            shadow_root.set_declarative(true);

            // Set 8.4. Set template's template contents property to shadow.
            let shadow = shadow_root.upcast::();
            template_element.set_contents(Some(shadow));

            // Step 8.5. Set shadow’s available to element internals to true.
            shadow_root.set_available_to_element_internals(true);

            true
        },
        Err(_) => false,
    }
}