Fix #6623 and update toml.py

2025-09-22 12:50:08 +01:00 · 2015-07-15 22:43:08 +03:00 · 2015-07-15 22:43:08 +03:00 · 75e14e80cd
commit 75e14e80cd
parent 3a5e4335d7
2 changed files with 331 additions and 119 deletions
--- a/python/tidy.py
+++ b/python/tidy.py
@ -12,9 +12,10 @@ import fnmatch
 import itertools
 import re
 import sys
 import toml
 from licenseck import licenses
-filetypes_to_check = [".rs", ".rc", ".cpp", ".c", ".h", ".py"]
+filetypes_to_check = [".rs", ".rc", ".cpp", ".c", ".h", ".py", ".toml"]
 reftest_directories = ["tests/ref"]
 reftest_filetype = ".list"
 python_dependencies = [
@ -145,10 +146,21 @@ def check_flake8(file_paths):
    return num_errors
 def check_toml(contents):
    contents = contents.splitlines(True)
    for idx, line in enumerate(contents):
        if line.find("*") != -1:
            yield (idx + 1, "found asterisk instead of minimum version number")
 def collect_errors_for_files(files_to_check, checking_functions):
    for file_name in files_to_check:
        with open(file_name, "r") as fp:
            contents = fp.read()
            if file_name.endswith(".toml"):
                for error in check_toml(contents):
                    yield (file_name, error[0], error[1])
            else:
                for check in checking_functions:
                    for error in check(contents):
                        # filename, line, message
--- a/python/toml/toml.py
+++ b/python/toml/toml.py
@ -1,5 +1,18 @@
 import datetime, decimal, re
 class TomlTz(datetime.tzinfo):
    def __new__(self, toml_offset):
        self._raw_offset = toml_offset
        self._hours = int(toml_offset[:3])
        self._minutes = int(toml_offset[4:6])
    def tzname(self, dt):
        return "UTC"+self._raw_offset
    def utcoffset(self, dt):
        return datetime.timedelta(hours=self._hours, minutes=self._minutes)
 try:
    _range = xrange
 except NameError:
@ -8,31 +21,31 @@ except NameError:
    basestring = str
    unichr = chr
-def load(f):
+def load(f, _dict=dict):
    """Returns a dictionary containing the named file parsed as toml."""
    if isinstance(f, basestring):
        with open(f) as ffile:
-            return loads(ffile.read())
+            return loads(ffile.read(), _dict)
    elif isinstance(f, list):
        for l in f:
            if not isinstance(l, basestring):
                raise Exception("Load expects a list to contain filenames only")
-        d = []
+        d = _dict()
        for l in f:
            d.append(load(l))
-        r = {}
+        r = _dict()
        for l in d:
            toml_merge_dict(r, l)
        return r
    elif f.read:
-        return loads(f.read())
+        return loads(f.read(), _dict)
    else:
        raise Exception("You can only load a file descriptor, filename or list")
-def loads(s):
+def loads(s, _dict=dict):
    """Returns a dictionary containing s, a string, parsed as toml."""
    implicitgroups = []
-    retval = {}
+    retval = _dict()
    currentlevel = retval
    if isinstance(s, basestring):
        try:
@ -42,36 +55,93 @@ def loads(s):
        sl = list(s)
        openarr = 0
        openstring = False
        openstrchar = ""
        multilinestr = False
        arrayoftables = False
        beginline = True
        keygroup = False
        keyname = 0
        delnum = 1
        for i in range(len(sl)):
-            if sl[i] == '"':
+            if sl[i] == '\r' and sl[i+1] == '\n':
-                oddbackslash = False
+                sl[i] = ' '
-                try:
+                continue
            if keyname:
                if sl[i] == '\n':
                    raise Exception("Key name found without value. Reached end of line.")
                if openstring:
                    if sl[i] == openstrchar:
                        keyname = 2
                        openstring = False
                        openstrchar = ""
                    continue
                elif keyname == 1:
                    if sl[i].isspace():
                        keyname = 2
                        continue
                    elif sl[i].isalnum() or sl[i] == '_' or sl[i] == '-':
                        continue
                elif keyname == 2 and sl[i].isspace():
                    continue
                if sl[i] == '=':
                    keyname = 0
                else:
                    raise Exception("Found invalid character in key name: '"+sl[i]+"'. Try quoting the key name.")
            if sl[i] == "'" and openstrchar != '"':
                k = 1
-                    j = sl[i-k]
+                try:
                    while sl[i-k] == "'":
                        k += 1
                        if k == 3:
                            break
                except IndexError:
                    pass
                if k == 3:
                    multilinestr = not multilinestr
                    openstring = multilinestr
                else:
                    openstring = not openstring
                if openstring:
                    openstrchar = "'"
                else:
                    openstrchar = ""
            if sl[i] == '"' and openstrchar != "'":
                oddbackslash = False
-                    while j == '\\':
+                k = 1
                tripquote = False
                try:
                    while sl[i-k] == '"':
                        k += 1
                        if k == 3:
                            tripquote = True
                            break
                    while sl[i-k] == '\\':
                        oddbackslash = not oddbackslash
                        k += 1
                        j = sl[i-k]
                except IndexError:
                    pass
                if not oddbackslash:
                    if tripquote:
                        multilinestr = not multilinestr
                        openstring = multilinestr
                    else:
                        openstring = not openstring
-            if keygroup and (sl[i] == ' ' or sl[i] == '\t'):
+                if openstring:
-                keygroup = False
+                    openstrchar = '"'
-            if arrayoftables and (sl[i] == ' ' or sl[i] == '\t'):
+                else:
-                arrayoftables = False
+                    openstrchar = ""
-            if sl[i] == '#' and not openstring and not keygroup and not arrayoftables:
+            if sl[i] == '#' and not openstring and not keygroup and \
                    not arrayoftables:
                j = i
                try:
                    while sl[j] != '\n':
                        sl.insert(j, ' ')
                        sl.pop(j+1)
                        j += 1
-            if sl[i] == '[' and not openstring and not keygroup and not arrayoftables:
+                except IndexError:
                    break
            if sl[i] == '[' and not openstring and not keygroup and \
                    not arrayoftables:
                if beginline:
                    if sl[i+1] == '[':
                        arrayoftables = True
@ -88,22 +158,58 @@ def loads(s):
                else:
                    openarr -= 1
            if sl[i] == '\n':
-                if openstring:
+                if openstring or multilinestr:
                    if not multilinestr:
                        raise Exception("Unbalanced quotes")
-                if openarr:
+                    if sl[i-1] == "'" or sl[i-1] == '"':
                        sl.insert(i, sl[i-1])
                        sl.pop(i+1)
                        sl[i-3] = ' '
                elif openarr:
                    sl.insert(i, ' ')
                    sl.pop(i+1)
                else:
                    beginline = True
            elif beginline and sl[i] != ' ' and sl[i] != '\t':
                beginline = False
-                keygroup = True
+                if not keygroup and not arrayoftables:
                    if sl[i] == '=':
                        raise Exception("Found empty keyname. ")
                    keyname = 1
        s = ''.join(sl)
        s = s.split('\n')
    else:
        raise Exception("What exactly are you trying to pull?")
    multikey = None
    multilinestr = ""
    multibackslash = False
    for line in s:
        line = line.strip()
        if multikey:
            if multibackslash:
                strippedline = line.lstrip(' \t\n')
                if strippedline == '':
                    continue
                multilinestr += strippedline
            else:
                multilinestr += line
            multibackslash = False
            if len(line) > 2 and line[-1] == multilinestr[0] and \
                    line[-2] == multilinestr[0] and line[-3] == multilinestr[0]:
                value, vtype = load_value(multilinestr)
                currentlevel[multikey] = value
                multikey = None
                multilinestr = ""
            else:
                k = len(multilinestr) -1
                while k > -1 and multilinestr[k] == '\\':
                    multibackslash = not multibackslash
                    k -= 1
                if multibackslash:
                    multilinestr = multilinestr[:-1]
                else:
                    multilinestr += "\n"
            continue
        if line == "":
            continue
        if line[0] == '[':
@ -115,12 +221,25 @@ def loads(s):
                line = line[1:].split(']', 1)
            if line[1].strip() != "":
                raise Exception("Key group not on a line by itself.")
-            line = line[0]
+            groups = line[0].split('.')
-            if '[' in line:
+            i = 0
-                raise Exception("Key group name cannot contain '['")
+            while i < len(groups):
-            if ']' in line:
+                groups[i] = groups[i].strip()
-                raise Exception("Key group name cannot contain']'")
+                if groups[i][0] == '"' or groups[i][0] == "'":
-            groups = line.split('.')
+                    groupstr = groups[i]
                    j = i+1
                    while not groupstr[0] == groupstr[-1]:
                        j += 1
                        groupstr = '.'.join(groups[i:j])
                    groups[i] = groupstr[1:-1]
                    j -= 1
                    while j > i:
                        groups.pop(j)
                        j -= 1
                else:
                    if not re.match(r'^[A-Za-z0-9_-]+$', groups[i]):
                        raise Exception("Invalid group name '"+groups[i]+"'. Try quoting it.")
                i += 1
            currentlevel = retval
            for i in range(len(groups)):
                group = groups[i]
@ -134,23 +253,25 @@ def loads(s):
                            if arrayoftables:
                                raise Exception("An implicitly defined table can't be an array")
                        elif arrayoftables:
-                            currentlevel[group].append({})
+                            currentlevel[group].append(_dict())
                        else:
                            raise Exception("What? "+group+" already exists?"+str(currentlevel))
                except TypeError:
                    if i != len(groups) - 1:
                        implicitgroups.append(group)
-                    currentlevel = currentlevel[0]
+                    currentlevel = currentlevel[-1]
-                    if arrayoftables:
+                    try:
-                        currentlevel[group] = [{}]
+                        currentlevel[group]
-                    else:
+                    except KeyError:
-                        currentlevel[group] = {}
+                        currentlevel[group] = _dict()
                        if i == len(groups) - 1 and arrayoftables:
                            currentlevel[group] = [_dict()]
                except KeyError:
                    if i != len(groups) - 1:
                        implicitgroups.append(group)
-                    currentlevel[group] = {}
+                    currentlevel[group] = _dict()
                    if i == len(groups) - 1 and arrayoftables:
-                        currentlevel[group] = [{}]
+                        currentlevel[group] = [_dict()]
                currentlevel = currentlevel[group]
                if arrayoftables:
                    try:
@ -160,32 +281,112 @@ def loads(s):
        elif "=" in line:
            i = 1
            pair = line.split('=', i)
            if re.match(r'^[0-9]', pair[-1]):
                pair[-1] = re.sub(r'([0-9])_(?=[0-9])', r'\1', pair[-1])
            l = len(line)
-            while pair[-1][0] != ' ' and pair[-1][0] != '\t' and pair[-1][0] != '"' and pair[-1][0] != '[' and pair[-1] != 'true' and pair[-1] != 'false':
+            while pair[-1][0] != ' ' and pair[-1][0] != '\t' and \
                    pair[-1][0] != "'" and pair[-1][0] != '"' and \
                    pair[-1][0] != '[' and pair[-1] != 'true' and \
                    pair[-1] != 'false':
                try:
                    float(pair[-1])
                    break
                except ValueError:
-                    try:
+                    pass
-                        datetime.datetime.strptime(pair[-1], "%Y-%m-%dT%H:%M:%SZ")
+                if load_date(pair[-1]) != None:
                    break
                    except ValueError:
                i += 1
                prev_val = pair[-1]
                pair = line.split('=', i)
                if re.match(r'^[0-9]', pair[-1]):
                    pair[-1] = re.sub(r'([0-9])_(?=[0-9])', r'\1', pair[-1])
                if prev_val == pair[-1]:
                    raise Exception("Invalid date or number")
            newpair = []
            newpair.append('='.join(pair[:-1]))
            newpair.append(pair[-1])
            pair = newpair
            pair[0] = pair[0].strip()
            if (pair[0][0] == '"' or pair[0][0] == "'") and \
                    (pair[0][-1] == '"' or pair[0][-1] == "'"):
                pair[0] = pair[0][1:-1]
            pair[1] = pair[1].strip()
            if len(pair[1]) > 2 and (pair[1][0] == '"' or pair[1][0] == "'") \
                    and pair[1][1] == pair[1][0] and pair[1][2] == pair[1][0] \
                    and not (len(pair[1]) > 5 and pair[1][-1] == pair[1][0] \
                                 and pair[1][-2] == pair[1][0] and \
                                 pair[1][-3] == pair[1][0]):
                k = len(pair[1]) -1
                while k > -1 and pair[1][k] == '\\':
                    multibackslash = not multibackslash
                    k -= 1
                if multibackslash:
                    multilinestr = pair[1][:-1]
                else:
                    multilinestr = pair[1] + "\n"
                multikey = pair[0]
            else:
                value, vtype = load_value(pair[1])
            try:
                currentlevel[pair[0]]
                raise Exception("Duplicate keys!")
            except KeyError:
                if multikey:
                    continue
                else:
                    currentlevel[pair[0]] = value
    return retval
 def load_date(val):
    microsecond = 0
    tz = None
    if len(val) > 19 and val[19] == '.':
        microsecond = int(val[20:26])
        if len(val) > 26:
            tz = TomlTz(val[26:31])
    elif len(val) > 20:
        tz = TomlTz(val[19:24])
    try:
        d = datetime.datetime(int(val[:4]), int(val[5:7]), int(val[8:10]), int(val[11:13]), int(val[14:16]), int(val[17:19]), microsecond, tz)
    except ValueError:
        return None
    return d
 def load_unicode_escapes(v, hexbytes, prefix):
    hexchars = ['0', '1', '2', '3', '4', '5', '6', '7',
                '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
    skip = False
    i = len(v) - 1
    while i > -1 and v[i] == '\\':
        skip = not skip
        i -= 1
    for hx in hexbytes:
        if skip:
            skip = False
            i = len(hx) - 1
            while i > -1 and hx[i] == '\\':
                skip = not skip
                i -= 1
            v += prefix
            v += hx
            continue
        hxb = ""
        i = 0
        hxblen = 4
        if prefix == "\\U":
            hxblen = 8
        while i < hxblen:
            try:
                if not hx[i].lower() in hexchars:
                    raise IndexError("This is a hack")
            except IndexError:
                raise Exception("Invalid escape sequence")
            hxb += hx[i].lower()
            i += 1
        v += unichr(int(hxb, 16))
        v += unicode(hx[len(hxb):])
    return v
 def load_value(v):
    if v == 'true':
        return (True, "bool")
@ -193,6 +394,8 @@ def load_value(v):
        return (False, "bool")
    elif v[0] == '"':
        testv = v[1:].split('"')
        if testv[0] == '' and testv[1] == '':
            testv = testv[2:-2]
        closed = False
        for tv in testv:
            if tv == '':
@ -213,56 +416,41 @@ def load_value(v):
                        raise Exception("Stuff after closed string. WTF?")
                    else:
                        closed = True
-        escapes = ['0', 'b', 'f', '/', 'n', 'r', 't', '"', '\\']
+        escapes = ['0', 'b', 'f', 'n', 'r', 't', '"', '\\']
-        escapedchars = ['\0', '\b', '\f', '/', '\n', '\r', '\t', '\"', '\\']
+        escapedchars = ['\0', '\b', '\f', '\n', '\r', '\t', '\"', '\\']
        escapeseqs = v.split('\\')[1:]
        backslash = False
        for i in escapeseqs:
            if i == '':
                backslash = not backslash
            else:
-                if i[0] not in escapes and i[0] != 'u' and not backslash:
+                if i[0] not in escapes and i[0] != 'u' and i[0] != 'U' and \
                        not backslash:
                    raise Exception("Reserved escape sequence used")
                if backslash:
                    backslash = False
-        if "\\u" in v:
+        for prefix in ["\\u", "\\U"]:
-            hexchars = ['0', '1', '2', '3', '4', '5', '6', '7',
+            if prefix in v:
-                        '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']
+                hexbytes = v.split(prefix)
-            hexbytes = v.split('\\u')
+                v = load_unicode_escapes(hexbytes[0], hexbytes[1:], prefix)
            newv = hexbytes[0]
            hexbytes = hexbytes[1:]
            for hx in hexbytes:
                hxb = ""
                try:
                    if hx[0].lower() in hexchars:
                        hxb += hx[0].lower()
                        if hx[1].lower() in hexchars:
                            hxb += hx[1].lower()
                        if hx[2].lower() in hexchars:
                            hxb += hx[2].lower()
                            if hx[3].lower() in hexchars:
                                hxb += hx[3].lower()
                except IndexError:
                    if len(hxb) != 2:
                        raise Exception("Invalid escape sequence")
                if len(hxb) != 4 and len(hxb) != 2:
                    raise Exception("Invalid escape sequence")
                newv += unichr(int(hxb, 16))
                newv += unicode(hx[len(hxb):])
            v = newv
        for i in range(len(escapes)):
            if escapes[i] == '\\':
                v = v.replace("\\"+escapes[i], escapedchars[i])
            else:
                v = re.sub("([^\\\\](\\\\\\\\)*)\\\\"+escapes[i], "\\1"+escapedchars[i], v)
        if v[1] == '"':
            v = v[2:-2]
        return (v[1:-1], "str")
    elif v[0] == "'":
        if v[1] == "'":
            v = v[2:-2]
        return (v[1:-1], "str")
    elif v[0] == '[':
        return (load_array(v), "array")
    elif len(v) == 20 and v[-1] == 'Z':
        if v[10] == 'T':
            return (datetime.datetime.strptime(v, "%Y-%m-%dT%H:%M:%SZ"), "date")
    else:
-            raise Exception("Wait, what?")
+        parsed_date = load_date(v)
        if parsed_date != None:
            return (parsed_date, "date")
        else:
            itype = "int"
            digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
@ -270,7 +458,7 @@ def load_value(v):
            if v[0] == '-':
                neg = True
                v = v[1:]
-        if '.' in v:
+            if '.' in v or 'e' in v:
                if v.split('.', 1)[1] == '':
                    raise Exception("This float is missing digits after the point")
                if v[0] not in digits:
@ -283,7 +471,6 @@ def load_value(v):
                return (0 - v, itype)
            return (v, itype)
 def load_array(a):
    atype = None
    retval = []
@ -363,6 +550,12 @@ def dump_sections(o, sup):
    retdict = {}
    arraystr = ""
    for section in o:
        qsection = section
        if not re.match(r'^[A-Za-z0-9_-]+$', section):
            if '"' in section:
                qsection = "'" + section + "'"
            else:
                qsection = '"' + section + '"'
        if not isinstance(o[section], dict):
            arrayoftables = False
            if isinstance(o[section], list):
@ -372,8 +565,8 @@ def dump_sections(o, sup):
            if arrayoftables:
                for a in o[section]:
                    arraytabstr = ""
-                    arraystr += "[["+sup+section+"]]\n"
+                    arraystr += "[["+sup+qsection+"]]\n"
-                    s, d = dump_sections(a, sup+section)
+                    s, d = dump_sections(a, sup+qsection)
                    if s:
                        if s[0] == "[":
                            arraytabstr += s
@ -382,18 +575,20 @@ def dump_sections(o, sup):
                    while d != {}:
                        newd = {}
                        for dsec in d:
-                            s1, d1 = dump_sections(d[dsec], sup+section+dsec)
+                            s1, d1 = dump_sections(d[dsec], sup+qsection+"."+dsec)
                            if s1:
-                                arraytabstr += "["+sup+section+"."+dsec+"]\n"
+                                arraytabstr += "["+sup+qsection+"."+dsec+"]\n"
                                arraytabstr += s1
                            for s1 in d1:
                                newd[dsec+"."+s1] = d1[s1]
                        d = newd
                    arraystr += arraytabstr
            else:
-                retstr += section + " = " + str(dump_value(o[section])) + '\n'
+                if o[section] is not None:
                    retstr += (qsection + " = " +
                               str(dump_value(o[section])) + '\n')
        else:
-            retdict[section] = o[section]
+            retdict[qsection] = o[section]
    retstr += arraystr
    return (retstr, retdict)
@ -415,17 +610,22 @@ def dump_value(v):
        retval += "]"
        return retval
    if isinstance(v, (str, unicode)):
-        escapes = ['\\', '0', 'b', 'f', '/', 'n', 'r', 't', '"']
+        v = "%r" % v
-        escapedchars = ['\\', '\0', '\b', '\f', '/', '\n', '\r', '\t', '\"']
+        if v[0] == 'u':
-        for i in range(len(escapes)):
+            v = v[1:]
-            v = v.replace(escapedchars[i], "\\"+escapes[i])
+        singlequote = v[0] == "'"
        v = v[1:-1]
        if singlequote:
            v = v.replace("\\'", "'")
            v = v.replace('"', '\\"')
        v = v.replace("\\x", "\\u00")
        return str('"'+v+'"')
    if isinstance(v, bool):
        return str(v).lower()
    if isinstance(v, datetime.datetime):
        return v.isoformat()[:19]+'Z'
    if isinstance(v, float):
-        return '{0:f}'.format(decimal.Decimal(str(v)))
+        return str(v)
    return v
 def toml_merge_dict(a, b):