2014-03-24 16:32:24 -05:00
|
|
|
import re
|
2024-08-09 19:12:09 +08:00
|
|
|
|
|
|
|
|
from mal_types import (_symbol, _keyword, _list, List, Vector, Hash_Map, asPairs)
|
2014-03-24 16:32:24 -05:00
|
|
|
|
|
|
|
|
class Blank(Exception): pass
|
|
|
|
|
|
|
|
|
|
class Reader():
|
|
|
|
|
def __init__(self, tokens, position=0):
|
|
|
|
|
self.tokens = tokens
|
|
|
|
|
self.position = position
|
|
|
|
|
|
|
|
|
|
def next(self):
|
|
|
|
|
self.position += 1
|
|
|
|
|
return self.tokens[self.position-1]
|
|
|
|
|
|
|
|
|
|
def peek(self):
|
|
|
|
|
if len(self.tokens) > self.position:
|
|
|
|
|
return self.tokens[self.position]
|
|
|
|
|
else:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def tokenize(str):
|
2015-10-30 22:05:49 -05:00
|
|
|
tre = re.compile(r"""[\s,]*(~@|[\[\]{}()'`~^@]|"(?:[\\].|[^\\"])*"?|;.*|[^\s\[\]{}()'"`@,;]+)""");
|
2014-03-24 16:32:24 -05:00
|
|
|
return [t for t in re.findall(tre, str) if t[0] != ';']
|
|
|
|
|
|
2015-10-30 22:05:49 -05:00
|
|
|
def _unescape(s):
|
2024-08-09 19:12:09 +08:00
|
|
|
return s.replace('\\\\', '\b').replace('\\"', '"').replace('\\n', '\n').replace('\b', '\\')
|
2015-10-30 22:05:49 -05:00
|
|
|
|
2014-03-24 16:32:24 -05:00
|
|
|
def read_atom(reader):
|
|
|
|
|
int_re = re.compile(r"-?[0-9]+$")
|
|
|
|
|
float_re = re.compile(r"-?[0-9][0-9.]*$")
|
2019-05-08 22:00:55 +01:00
|
|
|
string_re = re.compile(r'"(?:[\\].|[^\\"])*"')
|
2014-03-24 16:32:24 -05:00
|
|
|
token = reader.next()
|
|
|
|
|
if re.match(int_re, token): return int(token)
|
2024-08-09 19:12:09 +08:00
|
|
|
elif re.match(float_re, token): return int(token)
|
|
|
|
|
elif re.match(string_re, token):return _unescape(token[1:-1])
|
2019-05-08 22:00:55 +01:00
|
|
|
elif token[0] == '"': raise Exception("expected '\"', got EOF")
|
2014-12-18 20:33:49 -06:00
|
|
|
elif token[0] == ':': return _keyword(token[1:])
|
2014-03-24 16:32:24 -05:00
|
|
|
elif token == "nil": return None
|
|
|
|
|
elif token == "true": return True
|
|
|
|
|
elif token == "false": return False
|
2014-04-02 22:23:37 -05:00
|
|
|
else: return _symbol(token)
|
2014-03-24 16:32:24 -05:00
|
|
|
|
2024-08-09 19:12:09 +08:00
|
|
|
def read_sequence(reader, start='(', end=')'):
|
2014-03-24 16:32:24 -05:00
|
|
|
token = reader.next()
|
|
|
|
|
if token != start: raise Exception("expected '" + start + "'")
|
|
|
|
|
|
|
|
|
|
token = reader.peek()
|
|
|
|
|
while token != end:
|
|
|
|
|
if not token: raise Exception("expected '" + end + "', got EOF")
|
2024-08-09 19:12:09 +08:00
|
|
|
yield read_form(reader)
|
2014-03-24 16:32:24 -05:00
|
|
|
token = reader.peek()
|
|
|
|
|
reader.next()
|
|
|
|
|
|
|
|
|
|
def read_hash_map(reader):
|
2024-08-09 19:12:09 +08:00
|
|
|
lst = read_sequence(reader, '{', '}')
|
|
|
|
|
return Hash_Map(asPairs(lst))
|
2014-03-24 16:32:24 -05:00
|
|
|
|
|
|
|
|
def read_list(reader):
|
2024-08-09 19:12:09 +08:00
|
|
|
return List(read_sequence(reader, '(', ')'))
|
2014-03-24 16:32:24 -05:00
|
|
|
|
|
|
|
|
def read_vector(reader):
|
2024-08-09 19:12:09 +08:00
|
|
|
return Vector(read_sequence(reader, '[', ']'))
|
2014-03-24 16:32:24 -05:00
|
|
|
|
|
|
|
|
def read_form(reader):
|
|
|
|
|
token = reader.peek()
|
|
|
|
|
# reader macros/transforms
|
|
|
|
|
if token[0] == ';':
|
|
|
|
|
reader.next()
|
|
|
|
|
return None
|
|
|
|
|
elif token == '\'':
|
|
|
|
|
reader.next()
|
2014-04-02 22:23:37 -05:00
|
|
|
return _list(_symbol('quote'), read_form(reader))
|
2014-03-24 16:32:24 -05:00
|
|
|
elif token == '`':
|
|
|
|
|
reader.next()
|
2014-04-02 22:23:37 -05:00
|
|
|
return _list(_symbol('quasiquote'), read_form(reader))
|
2014-03-24 16:32:24 -05:00
|
|
|
elif token == '~':
|
|
|
|
|
reader.next()
|
2014-04-02 22:23:37 -05:00
|
|
|
return _list(_symbol('unquote'), read_form(reader))
|
2014-03-24 16:32:24 -05:00
|
|
|
elif token == '~@':
|
|
|
|
|
reader.next()
|
2014-04-02 22:23:37 -05:00
|
|
|
return _list(_symbol('splice-unquote'), read_form(reader))
|
2014-03-24 16:32:24 -05:00
|
|
|
elif token == '^':
|
|
|
|
|
reader.next()
|
|
|
|
|
meta = read_form(reader)
|
2014-04-02 22:23:37 -05:00
|
|
|
return _list(_symbol('with-meta'), read_form(reader), meta)
|
2014-03-24 16:32:24 -05:00
|
|
|
elif token == '@':
|
|
|
|
|
reader.next()
|
2014-04-02 22:23:37 -05:00
|
|
|
return _list(_symbol('deref'), read_form(reader))
|
2014-03-24 16:32:24 -05:00
|
|
|
|
|
|
|
|
# list
|
|
|
|
|
elif token == ')': raise Exception("unexpected ')'")
|
|
|
|
|
elif token == '(': return read_list(reader)
|
|
|
|
|
|
|
|
|
|
# vector
|
|
|
|
|
elif token == ']': raise Exception("unexpected ']'");
|
|
|
|
|
elif token == '[': return read_vector(reader);
|
|
|
|
|
|
|
|
|
|
# hash-map
|
|
|
|
|
elif token == '}': raise Exception("unexpected '}'");
|
|
|
|
|
elif token == '{': return read_hash_map(reader);
|
|
|
|
|
|
|
|
|
|
# atom
|
|
|
|
|
else: return read_atom(reader);
|
|
|
|
|
|
|
|
|
|
def read_str(str):
|
|
|
|
|
tokens = tokenize(str)
|
2014-04-23 21:46:57 -05:00
|
|
|
if len(tokens) == 0: raise Blank("Blank Line")
|
2014-03-24 16:32:24 -05:00
|
|
|
return read_form(Reader(tokens))
|