Headers no longer have to be ASCII or UTF-8, though there is some disagreement with http::header::HeaderValue

This commit is contained in:
Patrick Shaughnessy 2019-12-20 20:04:41 -05:00
parent 3e77a0ae09
commit 0c08849d1c
6 changed files with 55 additions and 123 deletions

View file

@ -100,10 +100,20 @@ impl HeadersMethods for Headers {
combined_value.push(b','); combined_value.push(b',');
} }
combined_value.extend(valid_value.iter().cloned()); combined_value.extend(valid_value.iter().cloned());
self.header_list.borrow_mut().insert( match HeaderValue::from_bytes(&combined_value) {
HeaderName::from_str(&valid_name).unwrap(), Ok(value) => {
HeaderValue::from_bytes(&combined_value).unwrap(), self.header_list
); .borrow_mut()
.insert(HeaderName::from_str(&valid_name).unwrap(), value);
},
Err(_) => {
// can't add the header, but we don't need to panic the browser over it
warn!(
"Servo thinks \"{:?}\" is a valid HTTP header value but HeaderValue doesn't.",
combined_value
);
},
};
Ok(()) Ok(())
} }
@ -197,7 +207,7 @@ impl Headers {
for (name, value) in h.header_list.borrow().iter() { for (name, value) in h.header_list.borrow().iter() {
self.Append( self.Append(
ByteString::new(Vec::from(name.as_str())), ByteString::new(Vec::from(name.as_str())),
ByteString::new(Vec::from(value.to_str().unwrap().as_bytes())), ByteString::new(Vec::from(value.as_bytes())),
)?; )?;
} }
Ok(()) Ok(())
@ -267,13 +277,13 @@ impl Headers {
.map_or(vec![], |v| v.as_bytes().to_owned()) .map_or(vec![], |v| v.as_bytes().to_owned())
} }
pub fn sort_header_list(&self) -> Vec<(String, String)> { pub fn sort_header_list(&self) -> Vec<(String, Vec<u8>)> {
let borrowed_header_list = self.header_list.borrow(); let borrowed_header_list = self.header_list.borrow();
let headers_iter = borrowed_header_list.iter(); let headers_iter = borrowed_header_list.iter();
let mut header_vec = vec![]; let mut header_vec = vec![];
for (name, value) in headers_iter { for (name, value) in headers_iter {
let name = name.as_str().to_owned(); let name = name.as_str().to_owned();
let value = value.to_str().unwrap().to_owned(); let value = value.as_bytes().to_vec();
let name_value = (name, value); let name_value = (name, value);
header_vec.push(name_value); header_vec.push(name_value);
} }
@ -293,7 +303,7 @@ impl Iterable for Headers {
fn get_value_at_index(&self, n: u32) -> ByteString { fn get_value_at_index(&self, n: u32) -> ByteString {
let sorted_header_vec = self.sort_header_list(); let sorted_header_vec = self.sort_header_list();
let value = sorted_header_vec[n as usize].1.clone(); let value = sorted_header_vec[n as usize].1.clone();
ByteString::new(value.into_bytes().to_vec()) ByteString::new(value)
} }
fn get_key_at_index(&self, n: u32) -> ByteString { fn get_key_at_index(&self, n: u32) -> ByteString {
@ -345,40 +355,19 @@ pub fn is_forbidden_header_name(name: &str) -> bool {
} }
// There is some unresolved confusion over the definition of a name and a value. // There is some unresolved confusion over the definition of a name and a value.
// The fetch spec [1] defines a name as "a case-insensitive byte
// sequence that matches the field-name token production. The token
// productions are viewable in [2]." A field-name is defined as a
// token, which is defined in [3].
// ISSUE 1:
// It defines a value as "a byte sequence that matches the field-content token production."
// To note, there is a difference between field-content and
// field-value (which is made up of field-content and obs-fold). The
// current definition does not allow for obs-fold (which are white
// space and newlines) in values. So perhaps a value should be defined
// as "a byte sequence that matches the field-value token production."
// However, this would then allow values made up entirely of white space and newlines.
// RELATED ISSUE 2:
// According to a previously filed Errata ID: 4189 in [4], "the
// specified field-value rule does not allow single field-vchar
// surrounded by whitespace anywhere". They provided a fix for the
// field-content production, but ISSUE 1 has still not been resolved.
// The production definitions likely need to be re-written.
// [1] https://fetch.spec.whatwg.org/#concept-header-value
// [2] https://tools.ietf.org/html/rfc7230#section-3.2
// [3] https://tools.ietf.org/html/rfc7230#section-3.2.6
// [4] https://www.rfc-editor.org/errata_search.php?rfc=7230
// //
// As of December 2019 WHATWG, isn't even using grammar productions for value; // As of December 2019, WHATWG has no formal grammar production for value;
// https://fetch.spec.whatg.org/#concept-header-value just says not to have // https://fetch.spec.whatg.org/#concept-header-value just says not to have
// newlines, nulls, or leading/trailing whitespace. // newlines, nulls, or leading/trailing whitespace. It even allows
// octets that aren't a valid UTF-8 encoding, and WPT tests reflect this.
// The HeaderValue class does not fully reflect this, so headers
// containing bytes with values 1..31 or 127 can't be created, failing
// WPT tests but probably not affecting anything important on the real Internet.
fn validate_name_and_value(name: ByteString, value: ByteString) -> Fallible<(String, Vec<u8>)> { fn validate_name_and_value(name: ByteString, value: ByteString) -> Fallible<(String, Vec<u8>)> {
let valid_name = validate_name(name)?; let valid_name = validate_name(name)?;
if !is_legal_header_value(&value) {
// this is probably out of date return Err(Error::Type("Header value is not valid".to_string()));
if !is_field_content(&value) {
return Err(Error::Type("Value is not valid".to_string()));
} }
Ok((valid_name, value.into())) Ok((valid_name, value.into()))
} }
@ -431,47 +420,40 @@ fn is_field_name(name: &ByteString) -> bool {
is_token(&*name) is_token(&*name)
} }
// https://tools.ietf.org/html/rfc7230#section-3.2 // https://fetch.spec.whatg.org/#concept-header-value
// http://www.rfc-editor.org/errata_search.php?rfc=7230 fn is_legal_header_value(value: &ByteString) -> bool {
// Errata ID: 4189
// field-content = field-vchar [ 1*( SP / HTAB / field-vchar )
// field-vchar ]
fn is_field_content(value: &ByteString) -> bool {
let value_len = value.len(); let value_len = value.len();
if value_len == 0 { if value_len == 0 {
return false; return true;
} }
if !is_field_vchar(value[0]) { match value[0] {
return false; b' ' | b'\t' => return false,
} _ => {},
};
if value_len > 2 { match value[value_len - 1] {
for &ch in &value[1..value_len - 1] { b' ' | b'\t' => return false,
if !is_field_vchar(ch) && !is_space(ch) && !is_htab(ch) { _ => {},
return false; };
} for &ch in &value[..] {
match ch {
b'\0' | b'\n' | b'\r' => return false,
_ => {},
} }
} }
true
if !is_field_vchar(value[value_len - 1]) { // If accepting non-UTF8 header values causes breakage,
return false; // removing the above "true" and uncommenting the below code
} // would ameliorate it while still accepting most reasonable headers:
//match str::from_utf8(value) {
return true; // Ok(_) => true,
} // Err(_) => {
// warn!(
fn is_space(x: u8) -> bool { // "Rejecting spec-legal but non-UTF8 header value: {:?}",
x == b' ' // value
} // );
// false
fn is_htab(x: u8) -> bool { // },
x == b'\t' // }
}
// https://tools.ietf.org/html/rfc7230#section-3.2
fn is_field_vchar(x: u8) -> bool {
is_vchar(x) || is_obs_text(x)
} }
// https://tools.ietf.org/html/rfc5234#appendix-B.1 // https://tools.ietf.org/html/rfc5234#appendix-B.1

View file

@ -1,23 +0,0 @@
[request-forbidden-headers.any.html]
type: testharness
[Accept-Encoding is a forbidden request header]
expected: FAIL
[Access-Control-Request-Headers is a forbidden request header]
expected: FAIL
[Access-Control-Request-Method is a forbidden request header]
expected: FAIL
[request-forbidden-headers.any.worker.html]
type: testharness
[Accept-Encoding is a forbidden request header]
expected: FAIL
[Access-Control-Request-Headers is a forbidden request header]
expected: FAIL
[Access-Control-Request-Method is a forbidden request header]
expected: FAIL

View file

@ -48,15 +48,6 @@
[fetch() with value %08] [fetch() with value %08]
expected: FAIL expected: FAIL
[fetch() with value %09]
expected: FAIL
[fetch() with value %0A]
expected: FAIL
[fetch() with value %0D]
expected: FAIL
[XMLHttpRequest with value %0E] [XMLHttpRequest with value %0E]
expected: FAIL expected: FAIL
@ -164,7 +155,3 @@
[fetch() with value %1F] [fetch() with value %1F]
expected: FAIL expected: FAIL
[fetch() with value %20]
expected: FAIL

View file

@ -1,10 +1,4 @@
[headers-no-cors.window.html] [headers-no-cors.window.html]
["no-cors" Headers object cannot have accept-language/\x01 as header]
expected: FAIL
["no-cors" Headers object cannot have content-language/\x01 as header]
expected: FAIL
["no-cors" Headers object cannot have content-type set to text/plain;ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss, text/plain] ["no-cors" Headers object cannot have content-type set to text/plain;ssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssss, text/plain]
expected: FAIL expected: FAIL

View file

@ -1,5 +0,0 @@
[request-headers.html]
type: testharness
[Testing empty Request Content-Type header]
expected: FAIL

View file

@ -3,9 +3,6 @@
[Read Response's body as readableStream] [Read Response's body as readableStream]
expected: FAIL expected: FAIL
[Testing empty Response Content-Type header]
expected: FAIL
[Testing null Response body] [Testing null Response body]
expected: FAIL expected: FAIL