Fix #6623 and update toml.py

This commit is contained in:
Bogdan Cuza 2015-07-15 22:43:08 +03:00
parent 3a5e4335d7
commit 75e14e80cd
2 changed files with 331 additions and 119 deletions

View file

@ -12,9 +12,10 @@ import fnmatch
import itertools import itertools
import re import re
import sys import sys
import toml
from licenseck import licenses from licenseck import licenses
filetypes_to_check = [".rs", ".rc", ".cpp", ".c", ".h", ".py"] filetypes_to_check = [".rs", ".rc", ".cpp", ".c", ".h", ".py", ".toml"]
reftest_directories = ["tests/ref"] reftest_directories = ["tests/ref"]
reftest_filetype = ".list" reftest_filetype = ".list"
python_dependencies = [ python_dependencies = [
@ -145,10 +146,21 @@ def check_flake8(file_paths):
return num_errors return num_errors
def check_toml(contents):
contents = contents.splitlines(True)
for idx, line in enumerate(contents):
if line.find("*") != -1:
yield (idx + 1, "found asterisk instead of minimum version number")
def collect_errors_for_files(files_to_check, checking_functions): def collect_errors_for_files(files_to_check, checking_functions):
for file_name in files_to_check: for file_name in files_to_check:
with open(file_name, "r") as fp: with open(file_name, "r") as fp:
contents = fp.read() contents = fp.read()
if file_name.endswith(".toml"):
for error in check_toml(contents):
yield (file_name, error[0], error[1])
else:
for check in checking_functions: for check in checking_functions:
for error in check(contents): for error in check(contents):
# filename, line, message # filename, line, message

View file

@ -1,5 +1,18 @@
import datetime, decimal, re import datetime, decimal, re
class TomlTz(datetime.tzinfo):
def __new__(self, toml_offset):
self._raw_offset = toml_offset
self._hours = int(toml_offset[:3])
self._minutes = int(toml_offset[4:6])
def tzname(self, dt):
return "UTC"+self._raw_offset
def utcoffset(self, dt):
return datetime.timedelta(hours=self._hours, minutes=self._minutes)
try: try:
_range = xrange _range = xrange
except NameError: except NameError:
@ -8,31 +21,31 @@ except NameError:
basestring = str basestring = str
unichr = chr unichr = chr
def load(f): def load(f, _dict=dict):
"""Returns a dictionary containing the named file parsed as toml.""" """Returns a dictionary containing the named file parsed as toml."""
if isinstance(f, basestring): if isinstance(f, basestring):
with open(f) as ffile: with open(f) as ffile:
return loads(ffile.read()) return loads(ffile.read(), _dict)
elif isinstance(f, list): elif isinstance(f, list):
for l in f: for l in f:
if not isinstance(l, basestring): if not isinstance(l, basestring):
raise Exception("Load expects a list to contain filenames only") raise Exception("Load expects a list to contain filenames only")
d = [] d = _dict()
for l in f: for l in f:
d.append(load(l)) d.append(load(l))
r = {} r = _dict()
for l in d: for l in d:
toml_merge_dict(r, l) toml_merge_dict(r, l)
return r return r
elif f.read: elif f.read:
return loads(f.read()) return loads(f.read(), _dict)
else: else:
raise Exception("You can only load a file descriptor, filename or list") raise Exception("You can only load a file descriptor, filename or list")
def loads(s): def loads(s, _dict=dict):
"""Returns a dictionary containing s, a string, parsed as toml.""" """Returns a dictionary containing s, a string, parsed as toml."""
implicitgroups = [] implicitgroups = []
retval = {} retval = _dict()
currentlevel = retval currentlevel = retval
if isinstance(s, basestring): if isinstance(s, basestring):
try: try:
@ -42,36 +55,93 @@ def loads(s):
sl = list(s) sl = list(s)
openarr = 0 openarr = 0
openstring = False openstring = False
openstrchar = ""
multilinestr = False
arrayoftables = False arrayoftables = False
beginline = True beginline = True
keygroup = False keygroup = False
keyname = 0
delnum = 1 delnum = 1
for i in range(len(sl)): for i in range(len(sl)):
if sl[i] == '"': if sl[i] == '\r' and sl[i+1] == '\n':
oddbackslash = False sl[i] = ' '
try: continue
if keyname:
if sl[i] == '\n':
raise Exception("Key name found without value. Reached end of line.")
if openstring:
if sl[i] == openstrchar:
keyname = 2
openstring = False
openstrchar = ""
continue
elif keyname == 1:
if sl[i].isspace():
keyname = 2
continue
elif sl[i].isalnum() or sl[i] == '_' or sl[i] == '-':
continue
elif keyname == 2 and sl[i].isspace():
continue
if sl[i] == '=':
keyname = 0
else:
raise Exception("Found invalid character in key name: '"+sl[i]+"'. Try quoting the key name.")
if sl[i] == "'" and openstrchar != '"':
k = 1 k = 1
j = sl[i-k] try:
while sl[i-k] == "'":
k += 1
if k == 3:
break
except IndexError:
pass
if k == 3:
multilinestr = not multilinestr
openstring = multilinestr
else:
openstring = not openstring
if openstring:
openstrchar = "'"
else:
openstrchar = ""
if sl[i] == '"' and openstrchar != "'":
oddbackslash = False oddbackslash = False
while j == '\\': k = 1
tripquote = False
try:
while sl[i-k] == '"':
k += 1
if k == 3:
tripquote = True
break
while sl[i-k] == '\\':
oddbackslash = not oddbackslash oddbackslash = not oddbackslash
k += 1 k += 1
j = sl[i-k]
except IndexError: except IndexError:
pass pass
if not oddbackslash: if not oddbackslash:
if tripquote:
multilinestr = not multilinestr
openstring = multilinestr
else:
openstring = not openstring openstring = not openstring
if keygroup and (sl[i] == ' ' or sl[i] == '\t'): if openstring:
keygroup = False openstrchar = '"'
if arrayoftables and (sl[i] == ' ' or sl[i] == '\t'): else:
arrayoftables = False openstrchar = ""
if sl[i] == '#' and not openstring and not keygroup and not arrayoftables: if sl[i] == '#' and not openstring and not keygroup and \
not arrayoftables:
j = i j = i
try:
while sl[j] != '\n': while sl[j] != '\n':
sl.insert(j, ' ') sl.insert(j, ' ')
sl.pop(j+1) sl.pop(j+1)
j += 1 j += 1
if sl[i] == '[' and not openstring and not keygroup and not arrayoftables: except IndexError:
break
if sl[i] == '[' and not openstring and not keygroup and \
not arrayoftables:
if beginline: if beginline:
if sl[i+1] == '[': if sl[i+1] == '[':
arrayoftables = True arrayoftables = True
@ -88,22 +158,58 @@ def loads(s):
else: else:
openarr -= 1 openarr -= 1
if sl[i] == '\n': if sl[i] == '\n':
if openstring: if openstring or multilinestr:
if not multilinestr:
raise Exception("Unbalanced quotes") raise Exception("Unbalanced quotes")
if openarr: if sl[i-1] == "'" or sl[i-1] == '"':
sl.insert(i, sl[i-1])
sl.pop(i+1)
sl[i-3] = ' '
elif openarr:
sl.insert(i, ' ') sl.insert(i, ' ')
sl.pop(i+1) sl.pop(i+1)
else: else:
beginline = True beginline = True
elif beginline and sl[i] != ' ' and sl[i] != '\t': elif beginline and sl[i] != ' ' and sl[i] != '\t':
beginline = False beginline = False
keygroup = True if not keygroup and not arrayoftables:
if sl[i] == '=':
raise Exception("Found empty keyname. ")
keyname = 1
s = ''.join(sl) s = ''.join(sl)
s = s.split('\n') s = s.split('\n')
else: else:
raise Exception("What exactly are you trying to pull?") raise Exception("What exactly are you trying to pull?")
multikey = None
multilinestr = ""
multibackslash = False
for line in s: for line in s:
line = line.strip() line = line.strip()
if multikey:
if multibackslash:
strippedline = line.lstrip(' \t\n')
if strippedline == '':
continue
multilinestr += strippedline
else:
multilinestr += line
multibackslash = False
if len(line) > 2 and line[-1] == multilinestr[0] and \
line[-2] == multilinestr[0] and line[-3] == multilinestr[0]:
value, vtype = load_value(multilinestr)
currentlevel[multikey] = value
multikey = None
multilinestr = ""
else:
k = len(multilinestr) -1
while k > -1 and multilinestr[k] == '\\':
multibackslash = not multibackslash
k -= 1
if multibackslash:
multilinestr = multilinestr[:-1]
else:
multilinestr += "\n"
continue
if line == "": if line == "":
continue continue
if line[0] == '[': if line[0] == '[':
@ -115,12 +221,25 @@ def loads(s):
line = line[1:].split(']', 1) line = line[1:].split(']', 1)
if line[1].strip() != "": if line[1].strip() != "":
raise Exception("Key group not on a line by itself.") raise Exception("Key group not on a line by itself.")
line = line[0] groups = line[0].split('.')
if '[' in line: i = 0
raise Exception("Key group name cannot contain '['") while i < len(groups):
if ']' in line: groups[i] = groups[i].strip()
raise Exception("Key group name cannot contain']'") if groups[i][0] == '"' or groups[i][0] == "'":
groups = line.split('.') groupstr = groups[i]
j = i+1
while not groupstr[0] == groupstr[-1]:
j += 1
groupstr = '.'.join(groups[i:j])
groups[i] = groupstr[1:-1]
j -= 1
while j > i:
groups.pop(j)
j -= 1
else:
if not re.match(r'^[A-Za-z0-9_-]+$', groups[i]):
raise Exception("Invalid group name '"+groups[i]+"'. Try quoting it.")
i += 1
currentlevel = retval currentlevel = retval
for i in range(len(groups)): for i in range(len(groups)):
group = groups[i] group = groups[i]
@ -134,23 +253,25 @@ def loads(s):
if arrayoftables: if arrayoftables:
raise Exception("An implicitly defined table can't be an array") raise Exception("An implicitly defined table can't be an array")
elif arrayoftables: elif arrayoftables:
currentlevel[group].append({}) currentlevel[group].append(_dict())
else: else:
raise Exception("What? "+group+" already exists?"+str(currentlevel)) raise Exception("What? "+group+" already exists?"+str(currentlevel))
except TypeError: except TypeError:
if i != len(groups) - 1: if i != len(groups) - 1:
implicitgroups.append(group) implicitgroups.append(group)
currentlevel = currentlevel[0] currentlevel = currentlevel[-1]
if arrayoftables: try:
currentlevel[group] = [{}] currentlevel[group]
else: except KeyError:
currentlevel[group] = {} currentlevel[group] = _dict()
if i == len(groups) - 1 and arrayoftables:
currentlevel[group] = [_dict()]
except KeyError: except KeyError:
if i != len(groups) - 1: if i != len(groups) - 1:
implicitgroups.append(group) implicitgroups.append(group)
currentlevel[group] = {} currentlevel[group] = _dict()
if i == len(groups) - 1 and arrayoftables: if i == len(groups) - 1 and arrayoftables:
currentlevel[group] = [{}] currentlevel[group] = [_dict()]
currentlevel = currentlevel[group] currentlevel = currentlevel[group]
if arrayoftables: if arrayoftables:
try: try:
@ -160,32 +281,112 @@ def loads(s):
elif "=" in line: elif "=" in line:
i = 1 i = 1
pair = line.split('=', i) pair = line.split('=', i)
if re.match(r'^[0-9]', pair[-1]):
pair[-1] = re.sub(r'([0-9])_(?=[0-9])', r'\1', pair[-1])
l = len(line) l = len(line)
while pair[-1][0] != ' ' and pair[-1][0] != '\t' and pair[-1][0] != '"' and pair[-1][0] != '[' and pair[-1] != 'true' and pair[-1] != 'false': while pair[-1][0] != ' ' and pair[-1][0] != '\t' and \
pair[-1][0] != "'" and pair[-1][0] != '"' and \
pair[-1][0] != '[' and pair[-1] != 'true' and \
pair[-1] != 'false':
try: try:
float(pair[-1]) float(pair[-1])
break break
except ValueError: except ValueError:
try: pass
datetime.datetime.strptime(pair[-1], "%Y-%m-%dT%H:%M:%SZ") if load_date(pair[-1]) != None:
break break
except ValueError:
i += 1 i += 1
prev_val = pair[-1]
pair = line.split('=', i) pair = line.split('=', i)
if re.match(r'^[0-9]', pair[-1]):
pair[-1] = re.sub(r'([0-9])_(?=[0-9])', r'\1', pair[-1])
if prev_val == pair[-1]:
raise Exception("Invalid date or number")
newpair = [] newpair = []
newpair.append('='.join(pair[:-1])) newpair.append('='.join(pair[:-1]))
newpair.append(pair[-1]) newpair.append(pair[-1])
pair = newpair pair = newpair
pair[0] = pair[0].strip() pair[0] = pair[0].strip()
if (pair[0][0] == '"' or pair[0][0] == "'") and \
(pair[0][-1] == '"' or pair[0][-1] == "'"):
pair[0] = pair[0][1:-1]
pair[1] = pair[1].strip() pair[1] = pair[1].strip()
if len(pair[1]) > 2 and (pair[1][0] == '"' or pair[1][0] == "'") \
and pair[1][1] == pair[1][0] and pair[1][2] == pair[1][0] \
and not (len(pair[1]) > 5 and pair[1][-1] == pair[1][0] \
and pair[1][-2] == pair[1][0] and \
pair[1][-3] == pair[1][0]):
k = len(pair[1]) -1
while k > -1 and pair[1][k] == '\\':
multibackslash = not multibackslash
k -= 1
if multibackslash:
multilinestr = pair[1][:-1]
else:
multilinestr = pair[1] + "\n"
multikey = pair[0]
else:
value, vtype = load_value(pair[1]) value, vtype = load_value(pair[1])
try: try:
currentlevel[pair[0]] currentlevel[pair[0]]
raise Exception("Duplicate keys!") raise Exception("Duplicate keys!")
except KeyError: except KeyError:
if multikey:
continue
else:
currentlevel[pair[0]] = value currentlevel[pair[0]] = value
return retval return retval
def load_date(val):
microsecond = 0
tz = None
if len(val) > 19 and val[19] == '.':
microsecond = int(val[20:26])
if len(val) > 26:
tz = TomlTz(val[26:31])
elif len(val) > 20:
tz = TomlTz(val[19:24])
try:
d = datetime.datetime(int(val[:4]), int(val[5:7]), int(val[8:10]), int(val[11:13]), int(val[14:16]), int(val[17:19]), microsecond, tz)
except ValueError:
return None
return d
def load_unicode_escapes(v, hexbytes, prefix):
hexchars = ['0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
skip = False
i = len(v) - 1
while i > -1 and v[i] == '\\':
skip = not skip
i -= 1
for hx in hexbytes:
if skip:
skip = False
i = len(hx) - 1
while i > -1 and hx[i] == '\\':
skip = not skip
i -= 1
v += prefix
v += hx
continue
hxb = ""
i = 0
hxblen = 4
if prefix == "\\U":
hxblen = 8
while i < hxblen:
try:
if not hx[i].lower() in hexchars:
raise IndexError("This is a hack")
except IndexError:
raise Exception("Invalid escape sequence")
hxb += hx[i].lower()
i += 1
v += unichr(int(hxb, 16))
v += unicode(hx[len(hxb):])
return v
def load_value(v): def load_value(v):
if v == 'true': if v == 'true':
return (True, "bool") return (True, "bool")
@ -193,6 +394,8 @@ def load_value(v):
return (False, "bool") return (False, "bool")
elif v[0] == '"': elif v[0] == '"':
testv = v[1:].split('"') testv = v[1:].split('"')
if testv[0] == '' and testv[1] == '':
testv = testv[2:-2]
closed = False closed = False
for tv in testv: for tv in testv:
if tv == '': if tv == '':
@ -213,56 +416,41 @@ def load_value(v):
raise Exception("Stuff after closed string. WTF?") raise Exception("Stuff after closed string. WTF?")
else: else:
closed = True closed = True
escapes = ['0', 'b', 'f', '/', 'n', 'r', 't', '"', '\\'] escapes = ['0', 'b', 'f', 'n', 'r', 't', '"', '\\']
escapedchars = ['\0', '\b', '\f', '/', '\n', '\r', '\t', '\"', '\\'] escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"', '\\']
escapeseqs = v.split('\\')[1:] escapeseqs = v.split('\\')[1:]
backslash = False backslash = False
for i in escapeseqs: for i in escapeseqs:
if i == '': if i == '':
backslash = not backslash backslash = not backslash
else: else:
if i[0] not in escapes and i[0] != 'u' and not backslash: if i[0] not in escapes and i[0] != 'u' and i[0] != 'U' and \
not backslash:
raise Exception("Reserved escape sequence used") raise Exception("Reserved escape sequence used")
if backslash: if backslash:
backslash = False backslash = False
if "\\u" in v: for prefix in ["\\u", "\\U"]:
hexchars = ['0', '1', '2', '3', '4', '5', '6', '7', if prefix in v:
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'] hexbytes = v.split(prefix)
hexbytes = v.split('\\u') v = load_unicode_escapes(hexbytes[0], hexbytes[1:], prefix)
newv = hexbytes[0]
hexbytes = hexbytes[1:]
for hx in hexbytes:
hxb = ""
try:
if hx[0].lower() in hexchars:
hxb += hx[0].lower()
if hx[1].lower() in hexchars:
hxb += hx[1].lower()
if hx[2].lower() in hexchars:
hxb += hx[2].lower()
if hx[3].lower() in hexchars:
hxb += hx[3].lower()
except IndexError:
if len(hxb) != 2:
raise Exception("Invalid escape sequence")
if len(hxb) != 4 and len(hxb) != 2:
raise Exception("Invalid escape sequence")
newv += unichr(int(hxb, 16))
newv += unicode(hx[len(hxb):])
v = newv
for i in range(len(escapes)): for i in range(len(escapes)):
if escapes[i] == '\\': if escapes[i] == '\\':
v = v.replace("\\"+escapes[i], escapedchars[i]) v = v.replace("\\"+escapes[i], escapedchars[i])
else: else:
v = re.sub("([^\\\\](\\\\\\\\)*)\\\\"+escapes[i], "\\1"+escapedchars[i], v) v = re.sub("([^\\\\](\\\\\\\\)*)\\\\"+escapes[i], "\\1"+escapedchars[i], v)
if v[1] == '"':
v = v[2:-2]
return (v[1:-1], "str")
elif v[0] == "'":
if v[1] == "'":
v = v[2:-2]
return (v[1:-1], "str") return (v[1:-1], "str")
elif v[0] == '[': elif v[0] == '[':
return (load_array(v), "array") return (load_array(v), "array")
elif len(v) == 20 and v[-1] == 'Z':
if v[10] == 'T':
return (datetime.datetime.strptime(v, "%Y-%m-%dT%H:%M:%SZ"), "date")
else: else:
raise Exception("Wait, what?") parsed_date = load_date(v)
if parsed_date != None:
return (parsed_date, "date")
else: else:
itype = "int" itype = "int"
digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
@ -270,7 +458,7 @@ def load_value(v):
if v[0] == '-': if v[0] == '-':
neg = True neg = True
v = v[1:] v = v[1:]
if '.' in v: if '.' in v or 'e' in v:
if v.split('.', 1)[1] == '': if v.split('.', 1)[1] == '':
raise Exception("This float is missing digits after the point") raise Exception("This float is missing digits after the point")
if v[0] not in digits: if v[0] not in digits:
@ -283,7 +471,6 @@ def load_value(v):
return (0 - v, itype) return (0 - v, itype)
return (v, itype) return (v, itype)
def load_array(a): def load_array(a):
atype = None atype = None
retval = [] retval = []
@ -363,6 +550,12 @@ def dump_sections(o, sup):
retdict = {} retdict = {}
arraystr = "" arraystr = ""
for section in o: for section in o:
qsection = section
if not re.match(r'^[A-Za-z0-9_-]+$', section):
if '"' in section:
qsection = "'" + section + "'"
else:
qsection = '"' + section + '"'
if not isinstance(o[section], dict): if not isinstance(o[section], dict):
arrayoftables = False arrayoftables = False
if isinstance(o[section], list): if isinstance(o[section], list):
@ -372,8 +565,8 @@ def dump_sections(o, sup):
if arrayoftables: if arrayoftables:
for a in o[section]: for a in o[section]:
arraytabstr = "" arraytabstr = ""
arraystr += "[["+sup+section+"]]\n" arraystr += "[["+sup+qsection+"]]\n"
s, d = dump_sections(a, sup+section) s, d = dump_sections(a, sup+qsection)
if s: if s:
if s[0] == "[": if s[0] == "[":
arraytabstr += s arraytabstr += s
@ -382,18 +575,20 @@ def dump_sections(o, sup):
while d != {}: while d != {}:
newd = {} newd = {}
for dsec in d: for dsec in d:
s1, d1 = dump_sections(d[dsec], sup+section+dsec) s1, d1 = dump_sections(d[dsec], sup+qsection+"."+dsec)
if s1: if s1:
arraytabstr += "["+sup+section+"."+dsec+"]\n" arraytabstr += "["+sup+qsection+"."+dsec+"]\n"
arraytabstr += s1 arraytabstr += s1
for s1 in d1: for s1 in d1:
newd[dsec+"."+s1] = d1[s1] newd[dsec+"."+s1] = d1[s1]
d = newd d = newd
arraystr += arraytabstr arraystr += arraytabstr
else: else:
retstr += section + " = " + str(dump_value(o[section])) + '\n' if o[section] is not None:
retstr += (qsection + " = " +
str(dump_value(o[section])) + '\n')
else: else:
retdict[section] = o[section] retdict[qsection] = o[section]
retstr += arraystr retstr += arraystr
return (retstr, retdict) return (retstr, retdict)
@ -415,17 +610,22 @@ def dump_value(v):
retval += "]" retval += "]"
return retval return retval
if isinstance(v, (str, unicode)): if isinstance(v, (str, unicode)):
escapes = ['\\', '0', 'b', 'f', '/', 'n', 'r', 't', '"'] v = "%r" % v
escapedchars = ['\\', '\0', '\b', '\f', '/', '\n', '\r', '\t', '\"'] if v[0] == 'u':
for i in range(len(escapes)): v = v[1:]
v = v.replace(escapedchars[i], "\\"+escapes[i]) singlequote = v[0] == "'"
v = v[1:-1]
if singlequote:
v = v.replace("\\'", "'")
v = v.replace('"', '\\"')
v = v.replace("\\x", "\\u00")
return str('"'+v+'"') return str('"'+v+'"')
if isinstance(v, bool): if isinstance(v, bool):
return str(v).lower() return str(v).lower()
if isinstance(v, datetime.datetime): if isinstance(v, datetime.datetime):
return v.isoformat()[:19]+'Z' return v.isoformat()[:19]+'Z'
if isinstance(v, float): if isinstance(v, float):
return '{0:f}'.format(decimal.Decimal(str(v))) return str(v)
return v return v
def toml_merge_dict(a, b): def toml_merge_dict(a, b):