diff --git a/Cargo.lock b/Cargo.lock index 5e179fb0755..9c5a29a7f4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5408,6 +5408,7 @@ dependencies = [ "mime", "mime_guess", "net_traits", + "nom", "pixels", "profile_traits", "rayon", diff --git a/Cargo.toml b/Cargo.toml index 17f9b05ab1e..d09c35cf96d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -99,6 +99,7 @@ mime_guess = "2.0.5" mozangle = "0.5.3" net_traits = { path = "components/shared/net" } nix = "0.29" +nom = "7.1.3" num-traits = "0.2" num_cpus = "1.17.0" openxr = "0.19" diff --git a/components/net/Cargo.toml b/components/net/Cargo.toml index 6b41873a373..adbbf483850 100644 --- a/components/net/Cargo.toml +++ b/components/net/Cargo.toml @@ -52,6 +52,7 @@ malloc_size_of_derive = { workspace = true } mime = { workspace = true } mime_guess = { workspace = true } net_traits = { workspace = true } +nom = { workspace = true } pixels = { path = "../pixels" } profile_traits = { workspace = true } rayon = { workspace = true } diff --git a/components/net/cookie.rs b/components/net/cookie.rs index 58780e4132e..2d44bdedaa9 100644 --- a/components/net/cookie.rs +++ b/components/net/cookie.rs @@ -12,8 +12,15 @@ use std::time::SystemTime; use cookie::Cookie; use net_traits::CookieSource; use net_traits::pub_domains::is_pub_domain; +use nom::IResult; +use nom::branch::alt; +use nom::bytes::complete::{tag, tag_no_case, take, take_while_m_n}; +use nom::combinator::{opt, recognize}; +use nom::multi::{many0, many1, separated_list1}; +use nom::sequence::{delimited, preceded, terminated, tuple}; use serde::{Deserialize, Serialize}; use servo_url::ServoUrl; +use time::{Date, Month, OffsetDateTime, Time}; /// A stored cookie that wraps the definition in cookie-rs. This is used to implement /// various behaviours defined in the spec that rely on an associated request URL, @@ -38,7 +45,27 @@ impl ServoCookie { request: &ServoUrl, source: CookieSource, ) -> Option { - let cookie = Cookie::parse(cookie_str).ok()?; + let mut cookie = Cookie::parse(cookie_str.clone()).ok()?; + + // Cookie::parse uses RFC 2616 to parse + // cookie expiry date. If it fails to parse the expiry date, try to parse again with + // less strict algorithm from RFC6265. + // TODO: We can remove this code and the ServoCookie::parse_date function if cookie-rs + // library fixes this upstream. + if cookie.expires_datetime().is_none() { + let expiry_date_str = cookie_str + .split(';') + .filter_map(|key_value| { + key_value + .find('=') + .map(|i| (key_value[..i].trim(), key_value[(i + 1)..].trim())) + }) + .find_map(|(key, value)| key.eq_ignore_ascii_case("expires").then_some(value)); + if let Some(date_str) = expiry_date_str { + cookie.set_expires(Self::parse_date(date_str)); + } + } + ServoCookie::new_wrapped(cookie, request, source) } @@ -321,4 +348,257 @@ impl ServoCookie { true } + + /// + pub fn parse_date(string: &str) -> Option { + let string_in_bytes = string.as_bytes(); + + // Helper closures + let parse_ascii_u8 = + |bytes: &[u8]| -> Option { std::str::from_utf8(bytes).ok()?.parse::().ok() }; + let parse_ascii_i32 = + |bytes: &[u8]| -> Option { std::str::from_utf8(bytes).ok()?.parse::().ok() }; + + // Step 1. Using the grammar below, divide the cookie-date into date-tokens. + // *OCTET + let any_octets = |input| Ok(("".as_bytes(), input)); + // delimiter = %x09 / %x20-2F / %x3B-40 / %x5B-60 / %x7B-7E + let delimiter: fn(&[u8]) -> IResult<&[u8], u8> = |input| { + let (input, bytes) = take(1usize)(input)?; + if matches!(bytes[0], 0x09 | 0x20..=0x2F | 0x3B..=0x40 | 0x5B..=0x60 | 0x7B..=0x7E) { + Ok((input, bytes[0])) + } else { + Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Verify, + ))) + } + }; + // non-delimiter = %x00-08 / %x0A-1F / DIGIT / ":" / ALPHA / %x7F-FF + let non_delimiter: fn(&[u8]) -> IResult<&[u8], u8> = |input| { + let (input, bytes) = take(1usize)(input)?; + if matches!(bytes[0], + 0x00..=0x08 | 0x0A..=0x1F | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'a'..=b'z' | 0x7F..=0xFF) + { + Ok((input, bytes[0])) + } else { + Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Verify, + ))) + } + }; + // non-digit = %x00-2F / %x3A-FF + let non_digit: fn(&[u8]) -> IResult<&[u8], u8> = |input| { + let (input, bytes) = take(1usize)(input)?; + if matches!(bytes[0], 0x00..=0x2F | 0x3A..=0xFF) { + Ok((input, bytes[0])) + } else { + Err(nom::Err::Error(nom::error::Error::new( + input, + nom::error::ErrorKind::Verify, + ))) + } + }; + // time-field = 1*2DIGIT + let time_field = |input| take_while_m_n(1, 2, |byte: u8| byte.is_ascii_digit())(input); + // hms-time = time-field ":" time-field ":" time-field + let hms_time = |input| { + tuple(( + time_field, + preceded(tag(":"), time_field), + preceded(tag(":"), time_field), + ))(input) + }; + // time = hms-time [ non-digit *OCTET ] + let time = |input| terminated(hms_time, opt(tuple((non_digit, any_octets))))(input); + // year = 2*4DIGIT [ non-digit *OCTET ] + let year = |input| { + terminated( + take_while_m_n(2, 4, |byte: u8| byte.is_ascii_digit()), + opt(tuple((non_digit, any_octets))), + )(input) + }; + // month = ( "jan" / "feb" / "mar" / "apr" / + // "may" / "jun" / "jul" / "aug" / + // "sep" / "oct" / "nov" / "dec" ) *OCTET + let month = |input| { + terminated( + alt(( + tag_no_case("jan"), + tag_no_case("feb"), + tag_no_case("mar"), + tag_no_case("apr"), + tag_no_case("may"), + tag_no_case("jun"), + tag_no_case("jul"), + tag_no_case("aug"), + tag_no_case("sep"), + tag_no_case("oct"), + tag_no_case("nov"), + tag_no_case("dec"), + )), + any_octets, + )(input) + }; + // day-of-month = 1*2DIGIT [ non-digit *OCTET ] + let day_of_month = |input| { + terminated( + take_while_m_n(1, 2, |byte: u8| byte.is_ascii_digit()), + opt(tuple((non_digit, any_octets))), + )(input) + }; + // date-token = 1*non-delimiter + let date_token = |input| recognize(many1(non_delimiter))(input); + // date-token-list = date-token *( 1*delimiter date-token ) + let date_token_list = |input| separated_list1(delimiter, date_token)(input); + // cookie-date = *delimiter date-token-list *delimiter + let cookie_date = + |input| delimited(many0(delimiter), date_token_list, many0(delimiter))(input); + + // Step 2. Process each date-token sequentially in the order the date-tokens appear in the cookie-date: + let mut time_value: Option<(u8, u8, u8)> = None; // Also represents found-time flag. + let mut day_of_month_value: Option = None; // Also represents found-day-of-month flag. + let mut month_value: Option = None; // Also represents found-month flag. + let mut year_value: Option = None; // Also represents found-year flag. + + let (_, date_tokens) = cookie_date(string_in_bytes).ok()?; + for date_token in date_tokens { + // Step 2.1. If the found-time flag is not set and the token matches the time production, + if time_value.is_none() { + if let Ok((_, result)) = time(date_token) { + // set the found-time flag and set the hour-value, minute-value, and + // second-value to the numbers denoted by the digits in the date-token, + // respectively. + if let (Some(hour), Some(minute), Some(second)) = ( + parse_ascii_u8(result.0), + parse_ascii_u8(result.1), + parse_ascii_u8(result.2), + ) { + time_value = Some((hour, minute, second)); + } + // Skip the remaining sub-steps and continue to the next date-token. + continue; + } + } + + // Step 2.2. If the found-day-of-month flag is not set and the date-token matches the + // day-of-month production, + if day_of_month_value.is_none() { + if let Ok((_, result)) = day_of_month(date_token) { + // set the found-day-of-month flag and set the day-of-month-value to the number + // denoted by the date-token. + day_of_month_value = parse_ascii_u8(result); + // Skip the remaining sub-steps and continue to the next date-token. + continue; + } + } + + // Step 2.3. If the found-month flag is not set and the date-token matches the month production, + if month_value.is_none() { + if let Ok((_, result)) = month(date_token) { + // set the found-month flag and set the month-value to the month denoted by the date-token. + month_value = match std::str::from_utf8(result) + .unwrap() + .to_ascii_lowercase() + .as_str() + { + "jan" => Some(Month::January), + "feb" => Some(Month::February), + "mar" => Some(Month::March), + "apr" => Some(Month::April), + "may" => Some(Month::May), + "jun" => Some(Month::June), + "jul" => Some(Month::July), + "aug" => Some(Month::August), + "sep" => Some(Month::September), + "oct" => Some(Month::October), + "nov" => Some(Month::November), + "dec" => Some(Month::December), + _ => None, + }; + // Skip the remaining sub-steps and continue to the next date-token. + continue; + } + } + + // Step 2.4. If the found-year flag is not set and the date-token matches the year production, + if year_value.is_none() { + if let Ok((_, result)) = year(date_token) { + // set the found-year flag and set the year-value to the number denoted by the date-token. + year_value = parse_ascii_i32(result); + // Skip the remaining sub-steps and continue to the next date-token. + continue; + } + } + } + + // Step 3. If the year-value is greater than or equal to 70 and less than or equal to 99, + // increment the year-value by 1900. + if let Some(value) = year_value { + if (70..=99).contains(&value) { + year_value = Some(value + 1900); + } + } + + // Step 4. If the year-value is greater than or equal to 0 and less than or equal to 69, + // increment the year-value by 2000. + if let Some(value) = year_value { + if (0..=69).contains(&value) { + year_value = Some(value + 2000); + } + } + + // Step 5. Abort these steps and fail to parse the cookie-date if: + // * at least one of the found-day-of-month, found-month, found-year, or found-time flags is not set, + if day_of_month_value.is_none() || + month_value.is_none() || + year_value.is_none() || + time_value.is_none() + { + return None; + } + // * the day-of-month-value is less than 1 or greater than 31, + if let Some(value) = day_of_month_value { + if !(1..=31).contains(&value) { + return None; + } + } + // * the year-value is less than 1601, + if let Some(value) = year_value { + if value < 1601 { + return None; + } + } + // * the hour-value is greater than 23, + // * the minute-value is greater than 59, or + // * the second-value is greater than 59. + if let Some((hour_value, minute_value, second_value)) = time_value { + if hour_value > 23 || minute_value > 59 || second_value > 59 { + return None; + } + } + + // Step 6. Let the parsed-cookie-date be the date whose day-of-month, month, year, hour, + // minute, and second (in UTC) are the day-of-month-value, the month-value, the year-value, + // the hour-value, the minute-value, and the second-value, respectively. If no such date + // exists, abort these steps and fail to parse the cookie-date. + let parsed_cookie_date = OffsetDateTime::new_utc( + Date::from_calendar_date( + year_value.unwrap(), + month_value.unwrap(), + day_of_month_value.unwrap(), + ) + .ok()?, + Time::from_hms( + time_value.unwrap().0, + time_value.unwrap().1, + time_value.unwrap().2, + ) + .ok()?, + ); + + // Step 7. Return the parsed-cookie-date as the result of this algorithm. + Some(parsed_cookie_date) + } } diff --git a/components/net/tests/cookie.rs b/components/net/tests/cookie.rs index cf8fcf4baf9..76b1760f2a7 100644 --- a/components/net/tests/cookie.rs +++ b/components/net/tests/cookie.rs @@ -6,6 +6,7 @@ use net::cookie::ServoCookie; use net::cookie_storage::CookieStorage; use net_traits::CookieSource; use servo_url::ServoUrl; +use time::macros::datetime; #[test] fn test_domain_match() { @@ -473,3 +474,27 @@ fn test_cookie_eviction_all_nonsecure_new_nonsecure() { "extra2=bar; extra3=bar; extra4=bar; extra5=bar; foo=bar" ); } + +#[test] +fn test_parse_date() { + assert_eq!( + ServoCookie::parse_date("26 Jun 2024 15:35:10 GMT"), // without day of week + Some(datetime!(2024-06-26 15:35:10).assume_utc()) + ); + assert_eq!( + ServoCookie::parse_date("26-Jun-2024 15:35:10 GMT"), // dashed + Some(datetime!(2024-06-26 15:35:10).assume_utc()) + ); + assert_eq!( + ServoCookie::parse_date("26 Jun 2024 15:35:10"), // no GMT + Some(datetime!(2024-06-26 15:35:10).assume_utc()) + ); + assert_eq!( + ServoCookie::parse_date("26 Jun 24 15:35:10 GMT"), // 2-digit year + Some(datetime!(2024-06-26 15:35:10).assume_utc()) + ); + assert_eq!( + ServoCookie::parse_date("26 jun 2024 15:35:10 gmt"), // Lowercase + Some(datetime!(2024-06-26 15:35:10).assume_utc()) + ); +} diff --git a/components/script/Cargo.toml b/components/script/Cargo.toml index 0774fa6dffd..553ecd0e7bb 100644 --- a/components/script/Cargo.toml +++ b/components/script/Cargo.toml @@ -92,7 +92,7 @@ metrics = { path = "../metrics" } mime = { workspace = true } mime_guess = { workspace = true } net_traits = { workspace = true } -nom = "7.1.3" +nom = { workspace = true } num-traits = { workspace = true } num_cpus = { workspace = true } parking_lot = { workspace = true } diff --git a/tests/wpt/meta/xhr/cookies.http.html.ini b/tests/wpt/meta/xhr/cookies.http.html.ini deleted file mode 100644 index b3ed182ef25..00000000000 --- a/tests/wpt/meta/xhr/cookies.http.html.ini +++ /dev/null @@ -1,4 +0,0 @@ -[cookies.http.html] - [Basic non-cross-site cookie handling in XHR] - expected: FAIL -