advent-2022-python/advent/parser/parser.py
2022-12-10 19:30:10 +01:00

437 lines
14 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
from functools import reduce
from itertools import chain
from typing import Any, Callable, Generic, Iterator, Self, TypeVar, overload
import unicodedata
from .result import Result
T = TypeVar('T')
T1 = TypeVar('T1')
T2 = TypeVar('T2')
T3 = TypeVar('T3')
T4 = TypeVar('T4')
T5 = TypeVar('T5')
TR = TypeVar('TR')
@dataclass(slots=True, frozen=True)
class ParserInput:
input: str
start: int
def step(self) -> tuple[Self, str]:
if self.start >= len(self.input):
raise Exception("Already at End of Input")
return ParserInput(self.input, self.start + 1), self.input[self.start]
def has_data(self) -> bool:
return self.start < len(self.input)
def __repr__(self) -> str:
if self.start == 0:
return f'->[{self.input}]'
if self.start >= len(self.input):
return f'{self.input}'
if self.start < 3:
return f'{self.input[0:self.start-1]}->[{self.input[self.start:]}]'
return f'{self.input[self.start-3:self.start-1]}->[{self.input[self.start:]}]'
ParserResult = Iterator[tuple[ParserInput, T]]
ParserFunc = Callable[[ParserInput], ParserResult[T]]
class P(Generic[T]):
def __init__(self, func: ParserFunc[T]):
self.func = func
def parse(self, s: str, i: int = 0) -> Result[T]:
all_results = self.func(ParserInput(s, i))
try:
_, result = next(all_results)
return Result.of(result)
except StopIteration:
return Result.fail("No result")
def parse_multi(self, s: str, i: int = 0) -> Iterator[T]:
return (v for _, v in self.func(ParserInput(s, i)))
@classmethod
def pure(cls, value: T) -> P[T]:
return P(lambda pp: iter([(pp, value)]))
@classmethod
def fail(cls) -> P[Any]:
return P(lambda _: iter([]))
@classmethod
def _fix(cls, p1: Callable[[P[Any]], P[T]]) -> P[T]:
""" Not really nice helper function, but it works"""
return [p._forward(q.func) for p in [P(None)] for q in [p1(p)]][0] # type: ignore
def _forward(self, func: ParserFunc[T]) -> Self:
self.func = func
return self
def bind(self, bind_func: Callable[[T], P[TR]]) -> P[TR]:
def inner(parserPos: ParserInput) -> ParserResult[TR]:
return (r for rs in (bind_func(v).func(pp)
for pp, v in self.func(parserPos)) for r in rs)
return P(inner)
def fmap(self, map_func: Callable[[T], TR]) -> P[TR]:
def inner(parserPos: ParserInput) -> ParserResult[TR]:
return ((pp, map_func(v)) for pp, v in self.func(parserPos))
return P(inner)
def safe_fmap(self, map_func: Callable[[T], TR]) -> P[TR]:
def inner(parserPos: ParserInput) -> ParserResult[TR]:
for pp, v in self.func(parserPos):
try:
yield pp, map_func(v)
except Exception:
pass
return P(inner)
def replace(self, value: TR) -> P[TR]:
return self.fmap(lambda _: value)
def as_unit(self) -> P[tuple[()]]:
return self.fmap(lambda _: ())
def apply(self, p2: P[Callable[[T], TR]]) -> P[TR]:
return self.bind(lambda x: p2.bind(lambda y: P.pure(y(x))))
@classmethod
def first(cls, p1: P[T1], p2: P[Any]) -> P[T1]:
return p1.bind(lambda v1: p2.fmap(lambda _: v1))
@classmethod
def second(cls, p1: P[Any], p2: P[T2]) -> P[T2]:
return p1.bind(lambda _: p2)
def between(self, pre: P[Any], post: P[Any]) -> P[T]:
return P.map3(pre, self, post, lambda _1, v, _2: v)
def some(self) -> P[list[T]]:
return P._fix(lambda p: self.bind(
lambda x: P.either(p, P.pure([])).fmap(lambda ys: [x] + ys)))
def some_lazy(self) -> P[list[T]]:
return P._fix(lambda p: self.bind(
lambda x: P.either(P.pure([]), p).fmap(lambda ys: [x] + ys)))
def many(self) -> P[list[T]]:
return P.either(self.some(), P.pure([]))
def many_lazy(self) -> P[list[T]]:
return P.either(P.pure([]), self.some_lazy())
def satisfies(self, pred: Callable[[T], bool]) -> P[T]:
return self.bind(lambda v: P.pure(v) if pred(v) else P.fail())
def optional(self) -> P[T | None]:
return P.either(self, P.pure(None))
def optional_lazy(self) -> P[T | None]:
return P.either(P.pure(None), self)
@overload
def times(self, *, exact: int) -> P[list[T]]:
...
@overload
def times(self, *, min: int) -> P[list[T]]:
...
@overload
def times(self, *, max: int) -> P[list[T]]:
...
@overload
def times(self, *, min: int, max: int) -> P[list[T]]:
...
def times(self, *, max: int | None = None, min: int | None = None,
exact: int | None = None) -> P[list[T]]:
match (exact, min, max):
case (int(e), None, None):
return self.many().satisfies(lambda lst: len(lst) == e)
case (None, int(mn), None):
return self.many().satisfies(lambda lst: len(lst) >= mn)
case (None, None, int(mx)):
return self.many().satisfies(lambda lst: len(lst) <= mx)
case (None, int(mn), int(mx)):
return self.many().satisfies(lambda lst: mn <= len(lst) <= mx)
case _:
raise Exception("Illegal combination of parameters")
@overload
def times_lazy(self, *, exact: int) -> P[list[T]]:
...
@overload
def times_lazy(self, *, min: int) -> P[list[T]]:
...
@overload
def times_lazy(self, *, max: int) -> P[list[T]]:
...
@overload
def times_lazy(self, *, min: int, max: int) -> P[list[T]]:
...
def times_lazy(self, *, max: int | None = None, min: int | None = None,
exact: int | None = None) -> P[list[T]]:
match (exact, min, max):
case (int(e), None, None):
return self.many_lazy().satisfies(lambda lst: len(lst) == e)
case (None, int(mn), None):
return self.many_lazy().satisfies(lambda lst: len(lst) >= mn)
case (None, None, int(mx)):
return self.many_lazy().satisfies(lambda lst: len(lst) <= mx)
case (None, int(mn), int(mx)):
return self.many_lazy().satisfies(lambda lst: mn <= len(lst) <= mx)
case _:
raise Exception("Illegal combination of parameters")
def sep_by(self, sep: P[Any]) -> P[list[T]]:
return P.map2(self, P.second(sep, self).many(), lambda f, r: [f] + r)
def sep_by_lazy(self, sep: P[Any]) -> P[list[T]]:
return P.map2(self, P.second(sep, self).many_lazy(), lambda f, r: [f] + r)
def no(self) -> P[tuple[()]]:
def inner(parserPos: ParserInput) -> ParserResult[tuple[()]]:
result = self.func(parserPos)
try:
next(result)
# Silently yields nothing so is an empty Generator
except StopIteration:
yield (parserPos, ())
return P(inner)
@classmethod
def map2(cls, p1: P[T1], p2: P[T2], func: Callable[[T1, T2], TR]) -> P[TR]:
return p1.bind(lambda v1: p2.fmap(lambda v2: func(v1, v2)))
@classmethod
def map3(cls, p1: P[T1], p2: P[T2], p3: P[T3], func: Callable[[T1, T2, T3], TR]) -> P[TR]:
return p1.bind(
lambda v1: p2.bind(
lambda v2: p3.fmap(
lambda v3: func(v1, v2, v3))))
@classmethod
def map4(cls, p1: P[T1], p2: P[T2], p3: P[T3], p4: P[T4],
func: Callable[[T1, T2, T3, T4], TR]) -> P[TR]:
return p1.bind(
lambda v1: p2.bind(
lambda v2: p3.bind(
lambda v3: p4.fmap(
lambda v4: func(v1, v2, v3, v4)))))
@classmethod
def map5(cls, p1: P[T1], p2: P[T2], p3: P[T3], p4: P[T4], p5: P[T5],
func: Callable[[T1, T2, T3, T4, T5], TR]) -> P[TR]:
return p1.bind(
lambda v1: p2.bind(
lambda v2: p3.bind(
lambda v3: p4.bind(
lambda v4: p5.fmap(
lambda v5: func(v1, v2, v3, v4, v5))))))
@classmethod
@overload
def seq(cls, p1: P[T1], p2: P[T2], /) -> P[tuple[T1, T2]]:
...
@classmethod
@overload
def seq(cls, p1: P[T1], p2: P[T2], p3: P[T3], /) -> P[tuple[T1, T2, T3]]:
...
@classmethod
@overload
def seq(cls, p1: P[T1], p2: P[T2], p3: P[T3], p4: P[T4], /) -> P[tuple[T1, T2, T3, T4]]:
...
@classmethod
@overload
def seq(cls, p1: P[T1], p2: P[T2], p3: P[T3], p4: P[T4],
p5: P[T5], /) -> P[tuple[T1, T2, T3, T4, T5]]:
...
@classmethod
def seq(cls, *ps: P[Any]) -> P[tuple[Any, ...]]:
return reduce(lambda p, x: x.bind(
lambda a: p.fmap(lambda b: chain([a], b))),
list(ps)[::-1], P.pure(iter([]))).fmap(tuple)
@classmethod
@overload
def sep_seq(cls, p1: P[T1], p2: P[T2], /, *, sep: P[Any]) -> P[tuple[T1, T2]]:
...
@classmethod
@overload
def sep_seq(cls, p1: P[T1], p2: P[T2], p3: P[T3], /, *, sep: P[Any]) -> P[tuple[T1, T2, T3]]:
...
@classmethod
@overload
def sep_seq(cls, p1: P[T1], p2: P[T2], p3: P[T3], p4: P[T4], /,
*, sep: P[Any]) -> P[tuple[T1, T2, T3, T4]]:
...
@classmethod
@overload
def sep_seq(cls, p1: P[T1], p2: P[T2], p3: P[T3], p4: P[T4],
p5: P[T5], /, *, sep: P[Any]) -> P[tuple[T1, T2, T3, T4, T5]]:
...
@classmethod
def sep_seq(cls, *ps: P[Any], sep: P[Any]) -> P[tuple[Any, ...]]:
first, *rest = list(ps)
return P.map2(first,
reduce(lambda p, x: P.second(sep, x.bind(
lambda a: p.fmap(lambda b: chain([a], b)))),
rest[::-1], P.pure(iter([]))),
lambda f, r: (f,) + tuple(r))
@classmethod
def either(cls, p1: P[T1], p2: P[T2], /) -> P[T1 | T2]:
def inner(parserPos: ParserInput):
yield from p1.func(parserPos)
yield from p2.func(parserPos)
return P(inner)
@classmethod
@overload
def choice(cls, p1: P[T1], p2: P[T2], p3: P[T3], /) -> P[T1 | T2 | T3]:
...
@classmethod
@overload
def choice(cls, p1: P[T1], p2: P[T2], p3: P[T3], p4: P[T4], /) -> P[T1 | T2 | T3 | T4]:
...
@classmethod
@overload
def choice(cls, p1: P[T1], p2: P[T2], p3: P[T3], p4: P[T4],
p5: P[T5], /) -> P[T1 | T2 | T3 | T4 | T5]:
...
@classmethod
def choice(cls, *ps: P[Any]) -> P[Any]:
def inner(parserPos: ParserInput) -> Iterator[Any]:
for p in ps:
yield from p.func(parserPos)
return P(inner)
@classmethod
def choice2(cls, *ps: P[T]) -> P[T]:
return P.choice(*ps)
@classmethod
def one_char(cls) -> P[str]:
def inner(parserPos: ParserInput) -> ParserResult[str]:
if parserPos.has_data():
yield parserPos.step()
return P(inner)
@classmethod
def eof(cls) -> P[tuple[()]]:
def inner(parserPos: ParserInput) -> ParserResult[tuple[()]]:
if not parserPos.has_data():
yield parserPos, ()
return P(inner)
@classmethod
def char_func(cls, cmp: Callable[[str], bool]) -> P[str]:
return P.one_char().satisfies(cmp)
@classmethod
def is_char(cls, cmp: str) -> P[str]:
return P.char_func(lambda c: c == cmp)
@classmethod
def string(cls, s: str) -> P[str]:
return P.seq(*map(P.is_char, s)).replace(s)
@classmethod
def one_of(cls, s: str) -> P[str]:
return P.char_func(lambda c: c in s)
@classmethod
def any_decimal(cls) -> P[str]:
return P.char_func(lambda c: c.isdecimal())
@classmethod
def is_decimal(cls, num: int) -> P[str]:
return P.any_decimal().satisfies(lambda c: unicodedata.decimal(c) == num)
@classmethod
def is_not_decimal(cls, num: int) -> P[str]:
return P.any_decimal().satisfies(lambda c: unicodedata.decimal(c) != num)
@classmethod
def lower(cls) -> P[str]:
return P.char_func(lambda c: c.islower())
@classmethod
def upper(cls) -> P[str]:
return P.char_func(lambda c: c.isupper())
@classmethod
def space(cls) -> P[str]:
return P.char_func(lambda c: c.isspace())
@classmethod
def word(cls, p1: P[str]) -> P[str]:
return P.first(p1.many().fmap(lambda cs: ''.join(cs)), p1.no())
@classmethod
def unsigned(cls) -> P[int]:
return P.either(P.first(P.is_decimal(0), P.any_decimal().no()),
P.map2(P.is_not_decimal(0), P.word(P.any_decimal()),
lambda f, s: f + s)
).fmap(int)
@classmethod
def signed(cls) -> P[int]:
return P.map2(P.one_of('+-').optional(), P.unsigned(),
lambda sign, num: num if sign != '-' else -num)
def in_parens(self) -> P[T]:
return self.between(P.is_char('('), P.is_char(')'))
def in_angles(self) -> P[T]:
return self.between(P.is_char('<'), P.is_char('>'))
def in_brackets(self) -> P[T]:
return self.between(P.is_char('['), P.is_char(']'))
def in_curleys(self) -> P[T]:
return self.between(P.is_char('{'), P.is_char('}'))
def trim_left(self) -> P[T]:
return P.second(WHITE_SPACE, self)
def trim_right(self) -> P[T]:
return P.first(self, WHITE_SPACE)
def trim(self) -> P[T]:
return self.between(WHITE_SPACE, WHITE_SPACE)
WHITE_SPACE: P[tuple[()]] = P.space().many().as_unit()
SEP_SPACE: P[tuple[()]] = P.space().some().as_unit()