Headers no longer have to be ASCII or UTF-8, though there is some disagreement with http::header::HeaderValue

This commit is contained in:
Patrick Shaughnessy 2019-12-20 20:04:41 -05:00
parent 3e77a0ae09
commit 0c08849d1c
6 changed files with 55 additions and 123 deletions

View file

@ -100,10 +100,20 @@ impl HeadersMethods for Headers {
combined_value.push(b',');
}
combined_value.extend(valid_value.iter().cloned());
self.header_list.borrow_mut().insert(
HeaderName::from_str(&valid_name).unwrap(),
HeaderValue::from_bytes(&combined_value).unwrap(),
);
match HeaderValue::from_bytes(&combined_value) {
Ok(value) => {
self.header_list
.borrow_mut()
.insert(HeaderName::from_str(&valid_name).unwrap(), value);
},
Err(_) => {
// can't add the header, but we don't need to panic the browser over it
warn!(
"Servo thinks \"{:?}\" is a valid HTTP header value but HeaderValue doesn't.",
combined_value
);
},
};
Ok(())
}
@ -197,7 +207,7 @@ impl Headers {
for (name, value) in h.header_list.borrow().iter() {
self.Append(
ByteString::new(Vec::from(name.as_str())),
ByteString::new(Vec::from(value.to_str().unwrap().as_bytes())),
ByteString::new(Vec::from(value.as_bytes())),
)?;
}
Ok(())
@ -267,13 +277,13 @@ impl Headers {
.map_or(vec![], |v| v.as_bytes().to_owned())
}
pub fn sort_header_list(&self) -> Vec<(String, String)> {
pub fn sort_header_list(&self) -> Vec<(String, Vec<u8>)> {
let borrowed_header_list = self.header_list.borrow();
let headers_iter = borrowed_header_list.iter();
let mut header_vec = vec![];
for (name, value) in headers_iter {
let name = name.as_str().to_owned();
let value = value.to_str().unwrap().to_owned();
let value = value.as_bytes().to_vec();
let name_value = (name, value);
header_vec.push(name_value);
}
@ -293,7 +303,7 @@ impl Iterable for Headers {
fn get_value_at_index(&self, n: u32) -> ByteString {
let sorted_header_vec = self.sort_header_list();
let value = sorted_header_vec[n as usize].1.clone();
ByteString::new(value.into_bytes().to_vec())
ByteString::new(value)
}
fn get_key_at_index(&self, n: u32) -> ByteString {
@ -345,40 +355,19 @@ pub fn is_forbidden_header_name(name: &str) -> bool {
}
// There is some unresolved confusion over the definition of a name and a value.
// The fetch spec [1] defines a name as "a case-insensitive byte
// sequence that matches the field-name token production. The token
// productions are viewable in [2]." A field-name is defined as a
// token, which is defined in [3].
// ISSUE 1:
// It defines a value as "a byte sequence that matches the field-content token production."
// To note, there is a difference between field-content and
// field-value (which is made up of field-content and obs-fold). The
// current definition does not allow for obs-fold (which are white
// space and newlines) in values. So perhaps a value should be defined
// as "a byte sequence that matches the field-value token production."
// However, this would then allow values made up entirely of white space and newlines.
// RELATED ISSUE 2:
// According to a previously filed Errata ID: 4189 in [4], "the
// specified field-value rule does not allow single field-vchar
// surrounded by whitespace anywhere". They provided a fix for the
// field-content production, but ISSUE 1 has still not been resolved.
// The production definitions likely need to be re-written.
// [1] https://fetch.spec.whatwg.org/#concept-header-value
// [2] https://tools.ietf.org/html/rfc7230#section-3.2
// [3] https://tools.ietf.org/html/rfc7230#section-3.2.6
// [4] https://www.rfc-editor.org/errata_search.php?rfc=7230
//
// As of December 2019 WHATWG, isn't even using grammar productions for value;
// As of December 2019, WHATWG has no formal grammar production for value;
// https://fetch.spec.whatg.org/#concept-header-value just says not to have
// newlines, nulls, or leading/trailing whitespace.
// newlines, nulls, or leading/trailing whitespace. It even allows
// octets that aren't a valid UTF-8 encoding, and WPT tests reflect this.
// The HeaderValue class does not fully reflect this, so headers
// containing bytes with values 1..31 or 127 can't be created, failing
// WPT tests but probably not affecting anything important on the real Internet.
fn validate_name_and_value(name: ByteString, value: ByteString) -> Fallible<(String, Vec<u8>)> {
let valid_name = validate_name(name)?;
// this is probably out of date
if !is_field_content(&value) {
return Err(Error::Type("Value is not valid".to_string()));
if !is_legal_header_value(&value) {
return Err(Error::Type("Header value is not valid".to_string()));
}
Ok((valid_name, value.into()))
}
@ -431,47 +420,40 @@ fn is_field_name(name: &ByteString) -> bool {
is_token(&*name)
}
// https://tools.ietf.org/html/rfc7230#section-3.2
// http://www.rfc-editor.org/errata_search.php?rfc=7230
// Errata ID: 4189
// field-content = field-vchar [ 1*( SP / HTAB / field-vchar )
// field-vchar ]
fn is_field_content(value: &ByteString) -> bool {
// https://fetch.spec.whatg.org/#concept-header-value
fn is_legal_header_value(value: &ByteString) -> bool {
let value_len = value.len();
if value_len == 0 {
return false;
return true;
}
if !is_field_vchar(value[0]) {
return false;
}
if value_len > 2 {
for &ch in &value[1..value_len - 1] {
if !is_field_vchar(ch) && !is_space(ch) && !is_htab(ch) {
return false;
}
match value[0] {
b' ' | b'\t' => return false,
_ => {},
};
match value[value_len - 1] {
b' ' | b'\t' => return false,
_ => {},
};
for &ch in &value[..] {
match ch {
b'\0' | b'\n' | b'\r' => return false,
_ => {},
}
}
if !is_field_vchar(value[value_len - 1]) {
return false;
}
return true;
}
fn is_space(x: u8) -> bool {
x == b' '
}
fn is_htab(x: u8) -> bool {
x == b'\t'
}
// https://tools.ietf.org/html/rfc7230#section-3.2
fn is_field_vchar(x: u8) -> bool {
is_vchar(x) || is_obs_text(x)
true
// If accepting non-UTF8 header values causes breakage,
// removing the above "true" and uncommenting the below code
// would ameliorate it while still accepting most reasonable headers:
//match str::from_utf8(value) {
// Ok(_) => true,
// Err(_) => {
// warn!(
// "Rejecting spec-legal but non-UTF8 header value: {:?}",
// value
// );
// false
// },
// }
}
// https://tools.ietf.org/html/rfc5234#appendix-B.1