Use hosts-replaced URL only when loading resources

This commit is contained in:
James Graham 2015-06-18 13:51:59 +01:00
parent 56a9eab2a0
commit f52276d2cc
15 changed files with 162 additions and 132 deletions

View file

@ -5,8 +5,7 @@
use rustc_serialize::json::{decode};
use time;
use url::Url;
use net_traits::IncludeSubdomains;
use resource_task::{IPV4_REGEX, IPV6_REGEX};
use net_traits::{IncludeSubdomains, IPV4_REGEX, IPV6_REGEX};
use std::str::{from_utf8};

View file

@ -4,6 +4,7 @@
use net_traits::{ControlMsg, CookieSource, LoadData, Metadata, LoadConsumer, IncludeSubdomains};
use net_traits::ProgressMsg::{Payload, Done};
use net_traits::hosts::replace_hosts;
use devtools_traits::{ChromeToDevtoolsControlMsg, DevtoolsControlMsg, NetworkEvent};
use mime_classifier::MIMEClassifier;
use resource_task::{start_sending_opt, start_sending_sniffed_opt};
@ -95,7 +96,11 @@ fn load(mut load_data: LoadData,
// repository DOES exist, please update this constant to use it.
let max_redirects = 50;
let mut iters = 0;
let mut url = load_data.url.clone();
// URL of the document being loaded, as seen by all the higher-level code.
let mut doc_url = load_data.url.clone();
// URL that we actually fetch from the network, after applying the replacements
// specified in the hosts file.
let mut url = replace_hosts(&load_data.url);
let mut redirected_to = HashSet::new();
// If the URL is a view-source scheme then the scheme data contains the
@ -105,7 +110,8 @@ fn load(mut load_data: LoadData,
let viewing_source = url.scheme == "view-source";
if viewing_source {
let inner_url = load_data.url.non_relative_scheme_data().unwrap();
url = Url::parse(inner_url).unwrap();
doc_url = Url::parse(inner_url).unwrap();
url = replace_hosts(&doc_url);
match &*url.scheme {
"http" | "https" => {}
_ => {
@ -176,8 +182,11 @@ reason: \"certificate verify failed\" }]))";
}
};
// Preserve the `host` header set automatically by Request.
let host = req.headers().get::<Host>().unwrap().clone();
//Ensure that the host header is set from the original url
let host = Host {
hostname: doc_url.serialize_host().unwrap(),
port: doc_url.port_or_default()
};
// Avoid automatically preserving request headers when redirects occur.
// See https://bugzilla.mozilla.org/show_bug.cgi?id=401564 and
@ -204,9 +213,9 @@ reason: \"certificate verify failed\" }]))";
}
let (tx, rx) = ipc::channel().unwrap();
resource_mgr_chan.send(ControlMsg::GetCookiesForUrl(url.clone(),
tx,
CookieSource::HTTP)).unwrap();
resource_mgr_chan.send(ControlMsg::GetCookiesForUrl(doc_url.clone(),
tx,
CookieSource::HTTP)).unwrap();
if let Some(cookie_list) = rx.recv().unwrap() {
let mut v = Vec::new();
v.push(cookie_list.into_bytes());
@ -291,7 +300,7 @@ reason: \"certificate verify failed\" }]))";
if let Some(cookies) = response.headers.get_raw("set-cookie") {
for cookie in cookies.iter() {
if let Ok(cookies) = String::from_utf8(cookie.clone()) {
resource_mgr_chan.send(ControlMsg::SetCookiesForUrl(url.clone(),
resource_mgr_chan.send(ControlMsg::SetCookiesForUrl(doc_url.clone(),
cookies,
CookieSource::HTTP)).unwrap();
}
@ -340,15 +349,16 @@ reason: \"certificate verify failed\" }]))";
}
_ => {}
}
let new_url = match UrlParser::new().base_url(&url).parse(&new_url) {
let new_doc_url = match UrlParser::new().base_url(&doc_url).parse(&new_url) {
Ok(u) => u,
Err(e) => {
send_error(url, e.to_string(), start_chan);
send_error(doc_url, e.to_string(), start_chan);
return;
}
};
info!("redirecting to {}", new_url);
url = new_url;
info!("redirecting to {}", new_doc_url);
url = replace_hosts(&new_doc_url);
doc_url = new_doc_url;
// According to https://tools.ietf.org/html/rfc7231#section-6.4.2,
// historically UAs have rewritten POST->GET on 301 and 302 responses.
@ -358,12 +368,12 @@ reason: \"certificate verify failed\" }]))";
load_data.method = Method::Get;
}
if redirected_to.contains(&url) {
send_error(url, "redirect loop".to_string(), start_chan);
if redirected_to.contains(&doc_url) {
send_error(doc_url, "redirect loop".to_string(), start_chan);
return;
}
redirected_to.insert(url.clone());
redirected_to.insert(doc_url.clone());
continue;
}
None => ()
@ -374,7 +384,7 @@ reason: \"certificate verify failed\" }]))";
if viewing_source {
adjusted_headers.set(ContentType(Mime(TopLevel::Text, SubLevel::Plain, vec![])));
}
let mut metadata: Metadata = Metadata::default(url);
let mut metadata: Metadata = Metadata::default(doc_url);
metadata.set_content_type(match adjusted_headers.get() {
Some(&ContentType(ref mime)) => Some(mime),
None => None

View file

@ -2,7 +2,6 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#![feature(box_raw)]
#![feature(box_syntax)]
#![feature(fnbox)]
#![feature(mpsc_select)]
@ -10,8 +9,6 @@
#![feature(plugin)]
#![feature(vec_push_all)]
#![plugin(regex_macros)]
extern crate net_traits;
extern crate cookie as cookie_rs;
extern crate devtools_traits;
@ -29,8 +26,6 @@ extern crate time;
extern crate url;
extern crate uuid;
extern crate regex;
pub mod about_loader;
pub mod file_loader;
pub mod http_loader;

View file

@ -26,47 +26,12 @@ use hyper::header::{ContentType, Header, SetCookie, UserAgent};
use hyper::mime::{Mime, TopLevel, SubLevel};
use ipc_channel::ipc::{self, IpcReceiver, IpcSender};
use regex::Regex;
use std::borrow::ToOwned;
use std::boxed::FnBox;
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::io::{BufReader, Read};
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::mpsc::{channel, Sender};
static mut HOST_TABLE: Option<*mut HashMap<String, String>> = None;
pub static IPV4_REGEX: Regex = regex!(
r"^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"
);
pub static IPV6_REGEX: Regex = regex!(r"^([a-fA-F0-9]{0,4}[:]?){1,8}(/\d{1,3})?$");
pub fn global_init() {
//TODO: handle bad file path
let path = match env::var("HOST_FILE") {
Ok(host_file_path) => host_file_path,
Err(_) => return,
};
let mut file = match File::open(&path) {
Ok(f) => BufReader::new(f),
Err(_) => return,
};
let mut lines = String::new();
match file.read_to_string(&mut lines) {
Ok(_) => (),
Err(_) => return,
};
let host_table = Box::into_raw(parse_hostsfile(&lines));
unsafe {
HOST_TABLE = Some(host_table);
}
}
pub enum ProgressSender {
Channel(IpcSender<ProgressMsg>),
Listener(AsyncResponseTarget),
@ -187,38 +152,6 @@ pub fn new_resource_task(user_agent: Option<String>,
setup_chan
}
pub fn parse_hostsfile(hostsfile_content: &str) -> Box<HashMap<String, String>> {
let mut host_table = HashMap::new();
let lines: Vec<&str> = hostsfile_content.split('\n').collect();
for line in lines.iter() {
let ip_host: Vec<&str> = line.trim().split(|c: char| c == ' ' || c == '\t').collect();
if ip_host.len() > 1 {
if !IPV4_REGEX.is_match(ip_host[0]) && !IPV6_REGEX.is_match(ip_host[0]) { continue; }
let address = ip_host[0].to_owned();
for token in ip_host.iter().skip(1) {
if token.as_bytes()[0] == b'#' {
break;
}
host_table.insert(token.to_owned().to_string(), address.clone());
}
}
}
box host_table
}
pub fn replace_hosts(mut load_data: LoadData, host_table: *mut HashMap<String, String>) -> LoadData {
if let Some(h) = load_data.url.domain_mut() {
unsafe {
if let Some(ip) = (*host_table).get(h) {
*h = ip.clone();
}
}
}
return load_data;
}
struct ResourceChannelManager {
from_client: IpcReceiver<ControlMsg>,
resource_manager: ResourceManager
@ -296,12 +229,6 @@ impl ResourceManager {
}
fn load(&mut self, mut load_data: LoadData, consumer: LoadConsumer) {
unsafe {
if let Some(host_table) = HOST_TABLE {
load_data = replace_hosts(load_data, host_table);
}
}
self.user_agent.as_ref().map(|ua| {
load_data.preserved_headers.set(UserAgent(ua.clone()));
});