mirror of
https://github.com/servo/servo.git
synced 2025-06-06 16:45:39 +00:00
Devtools parser: reassemble fragmented messages (#36033)
* Devtools parser: reassemble fragmented messages Co-authored-by: Aria Edmonds <8436007+ar1a@users.noreply.github.com> Signed-off-by: Delan Azabani <dazabani@igalia.com> * Enable devtools parser tests on Linux only for now Signed-off-by: Delan Azabani <dazabani@igalia.com> --------- Signed-off-by: Delan Azabani <dazabani@igalia.com> Co-authored-by: Aria Edmonds <8436007+ar1a@users.noreply.github.com>
This commit is contained in:
parent
5d1c64dba9
commit
2c94110952
4 changed files with 147 additions and 25 deletions
|
@ -38,8 +38,8 @@
|
|||
# ./devtools_parser.py --use cap.pcap --port 1234 --filter watcher --range 10:30
|
||||
|
||||
import json
|
||||
import re
|
||||
import signal
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from subprocess import Popen, PIPE
|
||||
try:
|
||||
|
@ -49,7 +49,6 @@ except ImportError:
|
|||
return text
|
||||
|
||||
fields = ["frame.time", "tcp.srcport", "tcp.payload"]
|
||||
pattern = r"(\d+:){"
|
||||
|
||||
|
||||
# Use tshark to capture network traffic and save the result in a
|
||||
|
@ -98,40 +97,84 @@ def read_data(file):
|
|||
|
||||
|
||||
# Transform the raw output of wireshark into a more manageable one
|
||||
def process_data(out, port):
|
||||
out = [line.split("\t") for line in out.split("\n")]
|
||||
def process_data(input, port):
|
||||
# Split the input into lines.
|
||||
# `input` = newline-terminated lines of tab-delimited tshark(1) output
|
||||
lines = [line.split("\t") for line in input.split("\n")]
|
||||
|
||||
# Process fields
|
||||
data = []
|
||||
for line in out:
|
||||
# Remove empty lines and empty sends, and decode hex to bytes.
|
||||
# `lines` = [[date, port, hex-encoded data]], e.g.
|
||||
# `["Mar 18, 2025 21:09:51.879661797 AWST", "6080", "3133"]`
|
||||
# `["Mar 18, 2025 21:09:51.879661797 AWST", "6080", "393a"]`
|
||||
# `["Mar 18, 2025 21:09:51.879661797 AWST", "6080", "7b..."]`
|
||||
sends = []
|
||||
for line in lines:
|
||||
if len(line) != 3:
|
||||
continue
|
||||
|
||||
curr_time, curr_port, curr_data = line
|
||||
if len(curr_data) == 0:
|
||||
continue
|
||||
elif len(curr_data) % 2 == 1:
|
||||
print(f"[WARNING] Extra byte in hex-encoded data: {curr_data[-1]}", file=sys.stderr)
|
||||
curr_data = curr_data[:-1]
|
||||
if len(sends) > 0 and sends[-1][1] == curr_port:
|
||||
sends[-1][2] += bytearray.fromhex(curr_data)
|
||||
else:
|
||||
sends.append([curr_time, curr_port, bytearray.fromhex(curr_data)])
|
||||
|
||||
# Split and merge consecutive sends with the same port, to yield exactly one record per message.
|
||||
# Message records are of the form `length:{...}`, where `length` is an integer in ASCII decimal.
|
||||
# Incomplete messages are deferred until they are complete.
|
||||
# `sends` = [[date, port, record data]], e.g.
|
||||
# `["Mar 18, 2025 21:09:51.879661797 AWST", "6080", b"13"]`
|
||||
# `["Mar 18, 2025 21:09:51.879661797 AWST", "6080", b"9:"]`
|
||||
# `["Mar 18, 2025 21:09:51.879661797 AWST", "6080", b"{..."]`
|
||||
# `["Mar 18, 2025 21:09:51.879661797 AWST", "6080", b"...}"]`
|
||||
records = []
|
||||
scunge = {} # Map from port to incomplete message data
|
||||
for curr_time, curr_port, rest in sends:
|
||||
rest = scunge.pop(curr_port, b"") + rest
|
||||
while rest != b"":
|
||||
try:
|
||||
length, new_rest = rest.split(b":", 1) # Can raise ValueError
|
||||
length = int(length)
|
||||
if len(new_rest) < length:
|
||||
raise ValueError("Incomplete message (for now)")
|
||||
# If we found a `length:` prefix and we have enough data to satisfy it,
|
||||
# cut off the prefix so `rest` is just `{...}length:{...}length:{...}`.
|
||||
rest = new_rest
|
||||
except ValueError:
|
||||
print(f"[WARNING] Incomplete message detected (will try to reassemble): {repr(rest)}", file=sys.stderr)
|
||||
scunge[curr_port] = rest
|
||||
# Wait for more data from later sends, potentially after sends with the other port.
|
||||
break
|
||||
# Cut off the message so `rest` is just `length:{...}length:{...}`.
|
||||
message = rest[:length]
|
||||
rest = rest[length:]
|
||||
try:
|
||||
records.append([curr_time, curr_port, message.decode()])
|
||||
except UnicodeError as e:
|
||||
print(f"[WARNING] Failed to decode message as UTF-8: {e}")
|
||||
continue
|
||||
|
||||
# Process message records.
|
||||
# `records` = [[date, port, message text]], e.g.
|
||||
# `["Mar 18, 2025 21:09:51.879661797 AWST", "6080", "{...}"]`
|
||||
result = []
|
||||
for line in records:
|
||||
if len(line) != 3:
|
||||
continue
|
||||
curr_time, curr_port, text = line
|
||||
# Time
|
||||
curr_time = curr_time.split(" ")[-2].split(".")[0]
|
||||
# Port
|
||||
curr_port = "Servo" if curr_port == port else "Firefox"
|
||||
# Data
|
||||
if not curr_data:
|
||||
continue
|
||||
try:
|
||||
dec = bytearray.fromhex(curr_data).decode()
|
||||
except UnicodeError:
|
||||
continue
|
||||
result.append([curr_time, curr_port, len(result), text])
|
||||
|
||||
indices = [m.span() for m in re.finditer(pattern, dec)]
|
||||
indices.append((len(dec), -1))
|
||||
prev = 0
|
||||
|
||||
for min, max in indices:
|
||||
if min > 0:
|
||||
text = dec[prev - 1:min]
|
||||
data.append([curr_time, curr_port, len(data), text])
|
||||
prev = max
|
||||
|
||||
return data
|
||||
# `result` = [[date, endpoint, index, message text]], e.g.
|
||||
# `["Mar 18, 2025 21:09:51.879661797 AWST", "Servo", 0, "{...}"]`
|
||||
return result
|
||||
|
||||
|
||||
# Pretty prints the json message
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue