Prefetch img and scripts during parsing

2025-09-30 08:39:16 +01:00 · 2019-09-05 11:57:21 -05:00 · 2019-09-05 11:57:21 -05:00 · 1aeb97b281
commit 1aeb97b281
parent 5bcb1b579c
8 changed files with 351 additions and 45 deletions
--- a/components/script/dom/servoparser/mod.rs
+++ b/components/script/dom/servoparser/mod.rs
@ -62,6 +62,7 @@ use tendril::stream::LossyDecoder;

 mod async_html;
 mod html;
+mod prefetch;
 mod xml;

 #[dom_struct]
@ -101,6 +102,10 @@ pub struct ServoParser {
    aborted: Cell<bool>,
    /// <https://html.spec.whatwg.org/multipage/#script-created-parser>
    script_created_parser: bool,
+    /// We do a quick-and-dirty parse of the input looking for resources to prefetch
+    prefetch_tokenizer: DomRefCell<prefetch::Tokenizer>,
+    #[ignore_malloc_size_of = "Defined in html5ever"]
+    prefetch_input: DomRefCell<BufferQueue>,
 }

 #[derive(PartialEq)]
@ -403,6 +408,8 @@ impl ServoParser {
            script_nesting_level: Default::default(),
            aborted: Default::default(),
            script_created_parser: kind == ParserKind::ScriptCreated,
+            prefetch_tokenizer: DomRefCell::new(prefetch::Tokenizer::new(document)),
+            prefetch_input: DomRefCell::new(BufferQueue::new()),
        }
    }

@ -425,20 +432,50 @@ impl ServoParser {
        )
    }

+    fn push_tendril_input_chunk(&self, chunk: StrTendril) {
+        if !chunk.is_empty() {
+            // Per https://github.com/whatwg/html/issues/1495
+            // stylesheets should not be loaded for documents
+            // without browsing contexts.
+            // https://github.com/whatwg/html/issues/1495#issuecomment-230334047
+            // suggests that no content should be preloaded in such a case.
+            // We're conservative, and only prefetch for documents
+            // with browsing contexts.
+            if self.document.browsing_context().is_some() {
+                // Push the chunk into the prefetch input stream,
+                // which is tokenized eagerly, to scan for resources
+                // to prefetch. If the user script uses `document.write()`
+                // to overwrite the network input, this prefetching may
+                // have been wasted, but in most cases it won't.
+                let mut prefetch_input = self.prefetch_input.borrow_mut();
+                prefetch_input.push_back(chunk.clone());
+                let _ = self.prefetch_tokenizer
+                    .borrow_mut()
+                    .feed(&mut *prefetch_input);
+            }
+            // Push the chunk into the network input stream,
+            // which is tokenized lazily.
+            self.network_input.borrow_mut().push_back(chunk);
+        }
+    }
+
    fn push_bytes_input_chunk(&self, chunk: Vec<u8>) {
+        // For byte input, we convert it to text using the network decoder.
        let chunk = self
            .network_decoder
            .borrow_mut()
            .as_mut()
            .unwrap()
            .decode(chunk);
-        if !chunk.is_empty() {
-            self.network_input.borrow_mut().push_back(chunk);
-        }
+        self.push_tendril_input_chunk(chunk);
    }

    fn push_string_input_chunk(&self, chunk: String) {
-        self.network_input.borrow_mut().push_back(chunk.into());
+        // Convert the chunk to a tendril so cloning it isn't expensive.
+        // The input has already been decoded as a string, so doesn't need
+        // to be decoded by the network decoder again.
+        let chunk = StrTendril::from(chunk);
+        self.push_tendril_input_chunk(chunk);
    }

    fn parse_sync(&self) {