This is sort of a follow up to How to parse a nested structure presented as a flat list?. Read it for the long explanation.
My current code:
def parse(code: list, top=True) -> tuple:
print(code)
result = []
stack = []
i = 0
while i < len(code):
nonzero_tokens = []
for index, token in enumerate(code[i:], start=i):
if not top or (token[0] != 0 or (len(code) <= index + 1 or code[index + 1][0] == 0)):
nonzero_tokens.append((token, index))
if len(nonzero_tokens) == 4:
break
key1, key2, key3, pos = nonzero_tokens
if stack:
if top and (len(code) > pos[1] + 1 and code[pos[1] + 1][0] == 0) and (len(code) <= pos[1] + 2 or code[pos[1] + 2][0] != 0):
result[-1][-1].append(key1[0])
i = key1[1] + 1
elif pos[0][0] == 1:
stack.append((key1[0][0], key2[0][0], key3[0][0]))
result[-1][-1] += [key1[0], key2[0], key3[0], pos[0]]
i = pos[1] + 1
elif (key1[0][0], key2[0][0], key3[0][0]) == stack[-1]:
if pos[0][0] == -1:
if len(stack) > 1:
result[-1][-1] += [key1[0], key2[0], key3[0], pos[0]]
stack.pop()
i = pos[1] + 1
else:
if len(stack) == 1:
result[-1].append([pos[0]])
else:
result[-1][-1] += [key1[0], key2[0], key3[0], pos[0]]
i = pos[1] + 1
else:
result[-1][-1].append(key1[0])
i = key1[1] + 1
else:
if pos[0][0] == 1:
result.append([(key1[0], key2[0], key3[0]), [1]])
stack.append((key1[0][0], key2[0][0], key3[0][0]))
i = pos[1] + 1
else:
raise SyntaxError(f'function {key1[0][0]}.{key2[0][0]}.{key3[0][0]} at line {key1[0][1][0]}, position {key1[0][1][1]} has no opening instruction {key1[0][0]}.{key2[0][0]}.{key3[0][0]}.1')
for i, call in enumerate(result):
new_call = [call[0], {}]
for argument in call[1:]:
try:
new_call[1][argument[0]] = Parser.parse(argument[1:], top=False)[0]
except:
new_call[1][argument[0]] = argument[1:]
result[i] = new_call
return result, stack
Example input:
[(3, (1, 1)), (2, (1, 3)), (1, (1, 5)), (1, (1, 7)), (5, (2, 2)), (3, (2, 4)), (1, (2, 6)), (1, (2, 8)), (72, (2, 12)), (101, (2, 16)), (108, (2, 21)), (108, (2, 26)), (111, (2, 31)), (44, (2, 36)), (32, (2, 40)), (87, (2, 44)), (111, (2, 48)), (114, (2, 53)), (108, (2, 58)), (109, (2, 63)), (33, (2, 68)), (5, (3, 2)), (3, (3, 4)), (1, (3, 6)), (0, (3, 8)), (-1, (3, 11)), (3, (4, 1)), (2, (4, 3)), (1, (4, 5)), (-1, (4, 8))]
Source file for reference:
3.2.1.1 = Start of print
5.3.1.1 = 72, 101, 108, 108, 111, 44, 32, 87, 111, 114, 108, 109, 33 Makes the string "Hello, World!"
5.3.1.0.-1 End of string
3.2.1.-1 ^ End of print
# Escape characters so that 33.5.3.1 is not interpreted as a function call
^ Comment marker because the above comment contains numbers
Expected output:
[[((3, (1, 1)), (2, (1, 3)), (1, (1, 5))), {1: [[((5, (2, 2)), (3, (2, 4)), (1, (2, 6))), {1: (72, (2, 12)), (101, (2, 16)), (108, (2, 21)), (108, (2, 26)), (111, (2, 31)), (44, (2, 36)), (32, (2, 40)), (87, (2, 44)), (111, (2, 48)), (114, (2, 53)), (108, (2, 58)), (109, (2, 63)), (33, (2, 68))}]]}]]
Actual output:
[[((3, (1, 1)), (2, (1, 3)), (1, (1, 5))), {1: [(5, (2, 2)), (3, (2, 4)), (1, (2, 6)), (1, (2, 8)), (72, (2, 12)), (101, (2, 16)), (108, (2, 21)), (108, (2, 26)), (111, (2, 31)), (44, (2, 36)), (32, (2, 40)), (87, (2, 44)), (111, (2, 48)), (114, (2, 53)), (108, (2, 58)), (109, (2, 63)), (33, (2, 68)), (5, (3, 2)), (3, (3, 4)), (1, (3, 6)), (-1, (3, 11))]}]]
Functions have a three number identifier followed by a position indicator. A position of -1 closes the function. My function parses the top layer of functions, then passes the arguments of each to itself recursively to parse nested functions.
Zero is the escape character; any function-like sequence followed by a zero will not be treated as a function. Zeros are otherwise skipped. Two zeros (0 0) escapes a zero so it will not escape any functions and be treated as a single regular data 0.
Basically, as you can see, there is a zero to escape 33.5.3.1, but that zero is not passed to the recursive call. In general, escaping zeros are processed once by the top function, removed, and then no longer work in the recursive calls. I don't know how I can fix this. There are already several different fixes layered onto this overtly complex function. Please help.
This is sort of a follow up to How to parse a nested structure presented as a flat list?. Read it for the long explanation.
My current code:
def parse(code: list, top=True) -> tuple:
print(code)
result = []
stack = []
i = 0
while i < len(code):
nonzero_tokens = []
for index, token in enumerate(code[i:], start=i):
if not top or (token[0] != 0 or (len(code) <= index + 1 or code[index + 1][0] == 0)):
nonzero_tokens.append((token, index))
if len(nonzero_tokens) == 4:
break
key1, key2, key3, pos = nonzero_tokens
if stack:
if top and (len(code) > pos[1] + 1 and code[pos[1] + 1][0] == 0) and (len(code) <= pos[1] + 2 or code[pos[1] + 2][0] != 0):
result[-1][-1].append(key1[0])
i = key1[1] + 1
elif pos[0][0] == 1:
stack.append((key1[0][0], key2[0][0], key3[0][0]))
result[-1][-1] += [key1[0], key2[0], key3[0], pos[0]]
i = pos[1] + 1
elif (key1[0][0], key2[0][0], key3[0][0]) == stack[-1]:
if pos[0][0] == -1:
if len(stack) > 1:
result[-1][-1] += [key1[0], key2[0], key3[0], pos[0]]
stack.pop()
i = pos[1] + 1
else:
if len(stack) == 1:
result[-1].append([pos[0]])
else:
result[-1][-1] += [key1[0], key2[0], key3[0], pos[0]]
i = pos[1] + 1
else:
result[-1][-1].append(key1[0])
i = key1[1] + 1
else:
if pos[0][0] == 1:
result.append([(key1[0], key2[0], key3[0]), [1]])
stack.append((key1[0][0], key2[0][0], key3[0][0]))
i = pos[1] + 1
else:
raise SyntaxError(f'function {key1[0][0]}.{key2[0][0]}.{key3[0][0]} at line {key1[0][1][0]}, position {key1[0][1][1]} has no opening instruction {key1[0][0]}.{key2[0][0]}.{key3[0][0]}.1')
for i, call in enumerate(result):
new_call = [call[0], {}]
for argument in call[1:]:
try:
new_call[1][argument[0]] = Parser.parse(argument[1:], top=False)[0]
except:
new_call[1][argument[0]] = argument[1:]
result[i] = new_call
return result, stack
Example input:
[(3, (1, 1)), (2, (1, 3)), (1, (1, 5)), (1, (1, 7)), (5, (2, 2)), (3, (2, 4)), (1, (2, 6)), (1, (2, 8)), (72, (2, 12)), (101, (2, 16)), (108, (2, 21)), (108, (2, 26)), (111, (2, 31)), (44, (2, 36)), (32, (2, 40)), (87, (2, 44)), (111, (2, 48)), (114, (2, 53)), (108, (2, 58)), (109, (2, 63)), (33, (2, 68)), (5, (3, 2)), (3, (3, 4)), (1, (3, 6)), (0, (3, 8)), (-1, (3, 11)), (3, (4, 1)), (2, (4, 3)), (1, (4, 5)), (-1, (4, 8))]
Source file for reference:
3.2.1.1 = Start of print
5.3.1.1 = 72, 101, 108, 108, 111, 44, 32, 87, 111, 114, 108, 109, 33 Makes the string "Hello, World!"
5.3.1.0.-1 End of string
3.2.1.-1 ^ End of print
# Escape characters so that 33.5.3.1 is not interpreted as a function call
^ Comment marker because the above comment contains numbers
Expected output:
[[((3, (1, 1)), (2, (1, 3)), (1, (1, 5))), {1: [[((5, (2, 2)), (3, (2, 4)), (1, (2, 6))), {1: (72, (2, 12)), (101, (2, 16)), (108, (2, 21)), (108, (2, 26)), (111, (2, 31)), (44, (2, 36)), (32, (2, 40)), (87, (2, 44)), (111, (2, 48)), (114, (2, 53)), (108, (2, 58)), (109, (2, 63)), (33, (2, 68))}]]}]]
Actual output:
[[((3, (1, 1)), (2, (1, 3)), (1, (1, 5))), {1: [(5, (2, 2)), (3, (2, 4)), (1, (2, 6)), (1, (2, 8)), (72, (2, 12)), (101, (2, 16)), (108, (2, 21)), (108, (2, 26)), (111, (2, 31)), (44, (2, 36)), (32, (2, 40)), (87, (2, 44)), (111, (2, 48)), (114, (2, 53)), (108, (2, 58)), (109, (2, 63)), (33, (2, 68)), (5, (3, 2)), (3, (3, 4)), (1, (3, 6)), (-1, (3, 11))]}]]
Functions have a three number identifier followed by a position indicator. A position of -1 closes the function. My function parses the top layer of functions, then passes the arguments of each to itself recursively to parse nested functions.
Zero is the escape character; any function-like sequence followed by a zero will not be treated as a function. Zeros are otherwise skipped. Two zeros (0 0) escapes a zero so it will not escape any functions and be treated as a single regular data 0.
Basically, as you can see, there is a zero to escape 33.5.3.1, but that zero is not passed to the recursive call. In general, escaping zeros are processed once by the top function, removed, and then no longer work in the recursive calls. I don't know how I can fix this. There are already several different fixes layered onto this overtly complex function. Please help.
Share Improve this question edited Mar 19 at 15:04 Eric Wang asked Mar 18 at 17:41 Eric WangEric Wang 494 bronze badges 12 | Show 7 more comments1 Answer
Reset to default 1Your code was difficult to follow for me, so I started from scratch. I chose to work with an iterator to go through the input, and I have used a small queue (with 5 entries) to look ahead to see if there is a valid function combination that is not escaped.
I have defined a class Func
for function occurrences.
Here is the code:
from itertools import islice, chain, repeat
class LookaheadQueue(list):
def __init__(self, iterator, maxsize, default):
super().__init__()
self.maxsize = maxsize
self.iterator = chain(iterator, repeat(default))
self.default = default
self.take(0) # populate this instance
def take(self, size=1, at=0):
value = self[at:at+size]
del self[at:at+size] # remove the value(s) that are taken..
# and read more values from the iterator to fill up the queue to its capacity
self.extend(islice(self.iterator, self.maxsize - len(self)))
return value
class Func:
def __init__(self, items):
self.keys = tuple(items[:3])
self.arg_pos = items[3][0]
self.location = "line {}, position {}".format(*items[0][1])
self.name = ".".join((repr(key[0]) for key in self.keys))
def __eq__(self, other):
return isinstance(other, Func) and other.name == self.name
def __repr__(self):
return f"function {self.name}"
def parse(code):
empty = (None, (None, None)) # constant to pad the buffer with
items = LookaheadQueue(iter(code), 5, empty) # A buffer so we can look-ahead a bit
def recur(f):
argument = []
arguments = { 1: argument }
while items[0] != empty:
f2 = Func(items) # try to read the next 4 items as function/pos
if items[4][0] == 0 or (f2 != f and f2.arg_pos != 1): # escaped or not a new function
if not f: # if top level...
# ...it must consist of functions only (no argument-content)
raise SyntaxError(f'Function call expected, but {f} at {f.location} is either escaped or has no opening instruction {f}.1')
if items[4][0] == 0: # remove escape code if present
items.take(1, 4)
argument.extend(items.take(1)) # just take one item as part of the current argument
continue
items.take(4) # it was a function, so consume the four items
if f2.arg_pos == 1: # it's a nested function: use recursion
argument.append(recur(f2))
continue
# f and f2 are equal
if f2.arg_pos == -1: # this denotes the end of the current function
return [f.keys, arguments]
if f2.arg_pos in arguments: # same argument number is duplicated
raise ValueError(f'{f2} at {f2.location} has duplicated an argument {f2}.{f2.arg_pos}')
argument = [] # a new argument for the current function
arguments[f2.arg_pos] = argument
if f:
raise SyntaxError(f"{f} was not terminated with {f}.-1")
return argument
return recur(None)
Call on the example input:
code = [(3, (1, 1)), (2, (1, 3)), (1, (1, 5)), (1, (1, 7)), (5, (2, 2)), (3, (2, 4)), (1, (2, 6)), (1, (2, 8)), (72, (2, 12)), (101, (2, 16)), (108, (2, 21)), (108, (2, 26)), (111, (2, 31)), (44, (2, 36)), (32, (2, 40)), (87, (2, 44)), (111, (2, 48)), (114, (2, 53)), (108, (2, 58)), (109, (2, 63)), (33, (2, 68)), (5, (3, 2)), (3, (3, 4)), (1, (3, 6)), (0, (3, 8)), (-1, (3, 11)), (3, (4, 1)), (2, (4, 3)), (1, (4, 5)), (-1, (4, 8))]
result = parse(code)
print(result)
This produces the output as specified in your question.
33.5.3.1 = 0, ...
Then to me it seems that zero would be data for the first argument of the33.5.3
function, and not an escape. How will you distinguish an escaping 0 from a data 0? – trincot Commented Mar 19 at 13:13