aboutsummaryrefslogtreecommitdiff
path: root/tools/cru-py/cru/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/cru-py/cru/parsing.py')
-rw-r--r--tools/cru-py/cru/parsing.py192
1 files changed, 192 insertions, 0 deletions
diff --git a/tools/cru-py/cru/parsing.py b/tools/cru-py/cru/parsing.py
index 1d2fa7f..c31ce35 100644
--- a/tools/cru-py/cru/parsing.py
+++ b/tools/cru-py/cru/parsing.py
@@ -1,6 +1,8 @@
from __future__ import annotations
from abc import ABCMeta, abstractmethod
+from dataclasses import dataclass
+from enum import Enum
from typing import NamedTuple, TypeAlias, TypeVar, Generic, NoReturn, Callable
from ._error import CruException
@@ -9,6 +11,102 @@ from ._iter import CruIterable
_T = TypeVar("_T")
+class StrParseStream:
+ class MemStackEntry(NamedTuple):
+ pos: int
+ lineno: int
+
+ class MemStackPopStr(NamedTuple):
+ text: str
+ lineno: int
+
+ def __init__(self, text: str) -> None:
+ self._text = text
+ self._pos = 0
+ self._lineno = 1
+ self._length = len(self._text)
+ self._valid_pos_range = range(0, self.length + 1)
+ self._valid_offset_range = range(-self.length, self.length + 1)
+ self._mem_stack: CruIterable.IterList[StrParseStream.MemStackEntry] = (
+ CruIterable.IterList()
+ )
+
+ @property
+ def text(self) -> str:
+ return self._text
+
+ @property
+ def length(self) -> int:
+ return self._length
+
+ @property
+ def valid_pos_range(self) -> range:
+ return self._valid_pos_range
+
+ @property
+ def valid_offset_range(self) -> range:
+ return self._valid_offset_range
+
+ @property
+ def pos(self) -> int:
+ return self._pos
+
+ @property
+ def lineno(self) -> int:
+ return self._lineno
+
+ @property
+ def eof(self) -> bool:
+ return self._pos == self.length
+
+ def peek(self, length: int) -> str:
+ real_length = min(length, self.length - self._pos)
+ new_position = self._pos + real_length
+ text = self._text[self._pos : new_position]
+ return text
+
+ def read(self, length: int) -> str:
+ text = self.peek(length)
+ self._pos += len(text)
+ self._lineno += text.count("\n")
+ return text
+
+ def skip(self, length: int) -> None:
+ self.read(length)
+
+ def peek_str(self, text: str) -> bool:
+ if self.pos + len(text) > self.length:
+ return False
+ for offset in range(len(text)):
+ if self._text[self.pos + offset] != text[offset]:
+ return False
+ return True
+
+ def read_str(self, text: str) -> bool:
+ if not self.peek_str(text):
+ return False
+ self._pos += len(text)
+ self._lineno += text.count("\n")
+ return True
+
+ @property
+ def mem_stack(self) -> CruIterable.IterList[MemStackEntry]:
+ return self._mem_stack
+
+ def push_mem(self) -> None:
+ self.mem_stack.append(self.MemStackEntry(self.pos, self.lineno))
+
+ def pop_mem(self) -> MemStackEntry:
+ return self.mem_stack.pop()
+
+ def pop_mem_str(self, strip_end: int = 0) -> MemStackPopStr:
+ old = self.pop_mem()
+ assert self.pos >= old.pos
+ return self.MemStackPopStr(
+ self._text[old.pos : self.pos - strip_end], old.lineno
+ )
+
+
class ParseError(CruException, Generic[_T]):
def __init__(
self,
@@ -96,3 +194,97 @@ class SimpleLineConfigParser(Parser[SimpleLineConfigParserResult]):
result = SimpleLineConfigParserResult()
self._parse(text, lambda item: result.append(item))
return result
+
+
+class _StrWrapperVarParserTokenKind(Enum):
+ TEXT = "TEXT"
+ VAR = "VAR"
+
+
+@dataclass
+class _StrWrapperVarParserToken:
+ kind: _StrWrapperVarParserTokenKind
+ value: str
+ line_number: int
+
+ @property
+ def is_text(self) -> bool:
+ return self.kind is _StrWrapperVarParserTokenKind.TEXT
+
+ @property
+ def is_var(self) -> bool:
+ return self.kind is _StrWrapperVarParserTokenKind.VAR
+
+ @staticmethod
+ def from_mem_str(
+ kind: _StrWrapperVarParserTokenKind, mem_str: StrParseStream.MemStackPopStr
+ ) -> _StrWrapperVarParserToken:
+ return _StrWrapperVarParserToken(kind, mem_str.text, mem_str.lineno)
+
+ def __repr__(self) -> str:
+ return f"VAR: {self.value}" if self.is_var else "TEXT: ..."
+
+
+class _StrWrapperVarParserResult(CruIterable.IterList[_StrWrapperVarParserToken]):
+ pass
+
+
+class StrWrapperVarParser(Parser[_StrWrapperVarParserResult]):
+ TokenKind: TypeAlias = _StrWrapperVarParserTokenKind
+ Token: TypeAlias = _StrWrapperVarParserToken
+ Result: TypeAlias = _StrWrapperVarParserResult
+
+ def __init__(self, wrapper: str):
+ super().__init__(f"StrWrapperVarParser({wrapper})")
+ self._wrapper = wrapper
+
+ @property
+ def wrapper(self) -> str:
+ return self._wrapper
+
+ def parse(self, text: str) -> Result:
+ result = self.Result()
+
+ class _State(Enum):
+ TEXT = "TEXT"
+ VAR = "VAR"
+
+ state = _State.TEXT
+ stream = StrParseStream(text)
+ stream.push_mem()
+
+ while True:
+ if stream.eof:
+ break
+
+ if stream.read_str(self.wrapper):
+ if state is _State.TEXT:
+ result.append(
+ self.Token.from_mem_str(
+ self.TokenKind.TEXT, stream.pop_mem_str(len(self.wrapper))
+ )
+ )
+ state = _State.VAR
+ stream.push_mem()
+ else:
+ result.append(
+ self.Token.from_mem_str(
+ self.TokenKind.VAR,
+ stream.pop_mem_str(len(self.wrapper)),
+ )
+ )
+ state = _State.TEXT
+ stream.push_mem()
+
+ continue
+
+ stream.skip(1)
+
+ if state is _State.VAR:
+ raise ParseError("Text ended without closing variable.", self, text)
+
+ mem_str = stream.pop_mem_str()
+ if len(mem_str.text) != 0:
+ result.append(self.Token.from_mem_str(self.TokenKind.TEXT, mem_str))
+
+ return result