EventSource: decode UTF-8 code points across network packets

This commit is contained in:
Simon Sapin 2017-05-22 02:25:18 +02:00
parent 6ac106ca76
commit 57438cffeb
4 changed files with 40 additions and 5 deletions

1
Cargo.lock generated
View file

@ -2373,6 +2373,7 @@ dependencies = [
"tinyfiledialogs 2.5.9 (registry+https://github.com/rust-lang/crates.io-index)", "tinyfiledialogs 2.5.9 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "url 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"utf-8 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
"uuid 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "uuid 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"webrender_traits 0.39.0 (git+https://github.com/servo/webrender)", "webrender_traits 0.39.0 (git+https://github.com/servo/webrender)",
"webvr 0.0.1", "webvr 0.0.1",

View file

@ -89,6 +89,7 @@ swapper = "0.0.4"
time = "0.1.12" time = "0.1.12"
unicode-segmentation = "1.1.0" unicode-segmentation = "1.1.0"
url = {version = "1.2", features = ["heap_size", "query_encoding"]} url = {version = "1.2", features = ["heap_size", "query_encoding"]}
utf-8 = "0.7"
uuid = {version = "0.4", features = ["v4"]} uuid = {version = "0.4", features = ["v4"]}
xml5ever = {version = "0.7", features = ["unstable"]} xml5ever = {version = "0.7", features = ["unstable"]}
webrender_traits = {git = "https://github.com/servo/webrender", features = ["ipc"]} webrender_traits = {git = "https://github.com/servo/webrender", features = ["ipc"]}

View file

@ -16,8 +16,6 @@ use dom::eventtarget::EventTarget;
use dom::globalscope::GlobalScope; use dom::globalscope::GlobalScope;
use dom::messageevent::MessageEvent; use dom::messageevent::MessageEvent;
use dom_struct::dom_struct; use dom_struct::dom_struct;
use encoding::Encoding;
use encoding::all::UTF_8;
use euclid::length::Length; use euclid::length::Length;
use hyper::header::{Accept, qitem}; use hyper::header::{Accept, qitem};
use ipc_channel::ipc; use ipc_channel::ipc;
@ -39,6 +37,7 @@ use std::str::{Chars, FromStr};
use std::sync::{Arc, Mutex}; use std::sync::{Arc, Mutex};
use task_source::TaskSource; use task_source::TaskSource;
use timers::OneshotTimerCallback; use timers::OneshotTimerCallback;
use utf8;
header! { (LastEventId, "Last-Event-ID") => [String] } header! { (LastEventId, "Last-Event-ID") => [String] }
@ -76,6 +75,8 @@ enum ParserState {
} }
struct EventSourceContext { struct EventSourceContext {
incomplete_utf8: Option<utf8::Incomplete>,
event_source: Trusted<EventSource>, event_source: Trusted<EventSource>,
gen_id: GenerationId, gen_id: GenerationId,
action_sender: ipc::IpcSender<FetchResponseMsg>, action_sender: ipc::IpcSender<FetchResponseMsg>,
@ -293,12 +294,41 @@ impl FetchResponseListener for EventSourceContext {
} }
fn process_response_chunk(&mut self, chunk: Vec<u8>) { fn process_response_chunk(&mut self, chunk: Vec<u8>) {
let mut stream = String::new(); let mut input = &*chunk;
UTF_8.raw_decoder().raw_feed(&chunk, &mut stream); if let Some(mut incomplete) = self.incomplete_utf8.take() {
self.parse(stream.chars()) match incomplete.try_complete(input) {
None => return,
Some((result, remaining_input)) => {
self.parse(result.unwrap_or("\u{FFFD}").chars());
input = remaining_input;
}
}
}
while !input.is_empty() {
match utf8::decode(&input) {
Ok(s) => {
self.parse(s.chars());
return
}
Err(utf8::DecodeError::Invalid { valid_prefix, remaining_input, .. }) => {
self.parse(valid_prefix.chars());
self.parse("\u{FFFD}".chars());
input = remaining_input;
}
Err(utf8::DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => {
self.parse(valid_prefix.chars());
self.incomplete_utf8 = Some(incomplete_suffix);
return
}
}
}
} }
fn process_response_eof(&mut self, _response: Result<(), NetworkError>) { fn process_response_eof(&mut self, _response: Result<(), NetworkError>) {
if let Some(_) = self.incomplete_utf8.take() {
self.parse("\u{FFFD}".chars());
}
self.reestablish_the_connection(); self.reestablish_the_connection();
} }
} }
@ -378,6 +408,8 @@ impl EventSource {
// Step 14 // Step 14
let (action_sender, action_receiver) = ipc::channel().unwrap(); let (action_sender, action_receiver) = ipc::channel().unwrap();
let context = EventSourceContext { let context = EventSourceContext {
incomplete_utf8: None,
event_source: Trusted::new(&ev), event_source: Trusted::new(&ev),
gen_id: ev.generation_id.get(), gen_id: ev.generation_id.get(),
action_sender: action_sender.clone(), action_sender: action_sender.clone(),

View file

@ -102,6 +102,7 @@ extern crate time;
extern crate tinyfiledialogs; extern crate tinyfiledialogs;
extern crate unicode_segmentation; extern crate unicode_segmentation;
extern crate url; extern crate url;
extern crate utf8;
extern crate uuid; extern crate uuid;
extern crate webrender_traits; extern crate webrender_traits;
extern crate webvr_traits; extern crate webvr_traits;