diff --git a/pyproject.toml b/pyproject.toml index 2eb304a..21d57e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ + "lark>=1", "sphinx>=5.0,<7.0", "textX>=3.0", ] diff --git a/src/plcdoc/documenters.py b/src/plcdoc/documenters.py index 4f67467..1c45f29 100644 --- a/src/plcdoc/documenters.py +++ b/src/plcdoc/documenters.py @@ -2,7 +2,7 @@ import os.path from abc import ABC -from typing import Tuple, List, Dict, Optional, Any, Union +from typing import Tuple, List, Dict, Optional, Any import re from sphinx.util import logging @@ -13,7 +13,7 @@ ) from docutils.statemachine import StringList -from .interpreter import PlcInterpreter, PlcDeclaration, TextXMetaClass +from .interpreter import PlcInterpreter, PlcDeclaration, PlcVariableDeclaration logger = logging.getLogger(__name__) @@ -172,7 +172,7 @@ def format_name(self) -> str: def format_args(self, **kwargs: Any) -> Optional[str]: """Format arguments for signature, based on auto-data.""" - arg_strs = [f"{var.name}" for var in self.object.get_args()] + arg_strs = [f"{var.name}" for var in self.object.args] return "(" + ", ".join(arg_strs) + ")" @@ -205,10 +205,10 @@ def add_content(self, more_content: Optional[StringList]) -> None: # Also add VARs from meta-model args_block = [] - for var in self.object.get_args(): - line_param = f":{var.kind} {var.type.name} {var.name}:" - if var.comment and var.comment.text: - line_param += " " + var.comment.text + for var in self.object.args: + line_param = f":{var.kind} {var.ty} {var.name}:" + if var.comment: + line_param += " " + var.comment args_block.append(line_param) if args_block: @@ -230,7 +230,7 @@ def get_doc(self) -> Optional[List[List[str]]]: """Get docstring from the meta-model.""" # Read main docblock - comment_str = self.object.get_comment() + comment_str = self.object.comment if not comment_str: return [] @@ -393,10 +393,9 @@ def document_members(self, all_members: bool = False) -> None: member_documenters = [ PlcStructMemberDocumenter( self.directive, - member.name, - self.indent, - parent=self.object, member=member, + indent=self.indent, + parent=self.object, ) for member in self.object.members ] @@ -431,12 +430,11 @@ class PlcStructMemberDocumenter(PlcDataDocumenter): def __init__( self, directive, - name: str, + member: PlcVariableDeclaration, indent: str = "", parent: PlcDeclaration = None, - member: Optional[TextXMetaClass] = None, ) -> None: - super().__init__(directive, name, indent) + super().__init__(directive, member.name, indent) self.object = parent self.member = member @@ -444,23 +442,22 @@ def __init__( @classmethod def can_document_member( cls, - member: Union[PlcDeclaration, Any], + member: PlcVariableDeclaration, membername: str, isattr: bool, parent: Any, ) -> bool: - return type(member).__name__ == "Variable" - # Note: a TextX variable class is passed, not a complete PlcDeclaration + return isinstance(member, PlcVariableDeclaration) and member.kind == "member" def import_object(self, raiseerror: bool = False) -> bool: return self.member is not None # Expect member through constructor def get_doc(self) -> Optional[List[List[str]]]: # Read main docblock - if self.member is None or self.member.comment is None: + if self.member is None: return [] - comment_str = self.member.comment.text + comment_str = self.member.comment if not comment_str: return [] @@ -471,7 +468,7 @@ def format_signature(self, **kwargs: Any) -> str: return "" # Insert the known variable type - return f" : {self.member.type.name}" + return f" : {self.member.ty}" class PlcFolderDocumenter(PlcDataDocumenter): diff --git a/src/plcdoc/interpreter.py b/src/plcdoc/interpreter.py index 8bf8788..b78734a 100644 --- a/src/plcdoc/interpreter.py +++ b/src/plcdoc/interpreter.py @@ -2,11 +2,17 @@ import os from typing import List, Dict, Optional, Any +from dataclasses import dataclass from glob import glob import logging +from .parsing import parse_new, nodes as ast import xml.etree.ElementTree as ET + from textx import metamodel_from_file, TextXSyntaxError +USE_TEXTX = False +# USE_TEXTX = True + PACKAGE_DIR = os.path.dirname(__file__) logger = logging.getLogger(__name__) @@ -137,42 +143,44 @@ def _parse_file(self, filepath) -> bool: # Name is repeated inside the declaration, use it from there instead # name = item.attrib["Name"] - object_model = self._parse_declaration(item) - if object_model is None: + obj = self._parse_declaration(item, filepath) + if obj is None: continue - obj = PlcDeclaration(object_model, filepath) - # Methods are inside their own subtree with a `Declaration` - simply append # them to the object for node in item: if node.tag in ["Declaration", "Implementation"]: continue - method_model = self._parse_declaration(node) - if method_model is None: + method = self._parse_declaration(node, filepath) + if method is None: continue - method = PlcDeclaration(method_model, filepath) obj.add_child(method) self._add_model(obj) return True - def _parse_declaration(self, item) -> Optional["TextXMetaClass"]: + def _parse_declaration(self, item, filepath) -> Optional["TextXMetaClass"]: declaration_node = item.find("Declaration") if declaration_node is None: return None - try: - meta_model = self._meta_model.model_from_str(declaration_node.text) - return meta_model - except TextXSyntaxError as err: - name = item.attrib.get("Name", "") - logger.error( - "Error parsing node `%s` in file `%s`\n(%s)", - name, - self._active_file, - str(err), - ) + + if USE_TEXTX: + try: + meta_model = self._meta_model.model_from_str(declaration_node.text) + return textx_model_to_declaration(meta_model, filepath) + except TextXSyntaxError as err: + name = item.attrib.get("Name", "") + logger.error( + "Error parsing node `%s` in file `%s`\n(%s)", + name, + self._active_file, + str(err), + ) + else: + node = parse_new(declaration_node.text) + return ast_node_to_plc_declaration(node, filepath) return None @@ -255,6 +263,213 @@ def get_objects_in_folder(self, folder: str) -> List["PlcDeclaration"]: raise KeyError(f"Found no models in the folder `{folder}`") +def ast_node_to_plc_declaration(node, file) -> "PlcDeclaration": + objtype = None + name = None + args = [] + members = [] + comment = "" + + if isinstance(node, ast.Function): + name = node.name + objtype = node.kind + comment = process_comment(node.comment) + for vl in node.variable_lists: + for v in vl.variables: + arg = PlcVariableDeclaration( + kind=vl.kind.lower(), + name=v.name, + ty=ast.type_to_text(v.ty), + comment=v.comment, + ) + args.append(arg) + + elif isinstance(node, ast.TypeDef): + name = node.name + comment = process_comment(node.comment) + if isinstance(node.ty, ast.Struct): + objtype = "struct" + for f in node.ty.fields: + members.append(lark_field_to_var(f)) + elif isinstance(node.ty, ast.Union): + objtype = "union" + elif isinstance(node.ty, ast.Enum): + objtype = "enum" + else: + raise ValueError(f"typedef not supported for type: {node.ty}") + elif isinstance(node, ast.Property): + comment = process_comment(node.comment) + objtype = "property" + name = node.name + elif isinstance(node, ast.VariableList): + if file is None: + raise ValueError("Cannot parse GVL without file as no naming is present") + name = os.path.splitext(os.path.basename(file))[0] + objtype = "variable_list" + else: + raise ValueError(f"Unrecognized declaration in `{node}`") + + assert name is not None + + return PlcDeclaration( + objtype, name=name, comment=comment, args=args, members=members, file=file + ) + + +def lark_field_to_var(field: ast.StructField) -> "PlcVariableDeclaration": + comment = field.comment + ty = ast.type_to_text(field.ty) + return PlcVariableDeclaration( + kind="member", name=field.name, ty=ty, comment=comment + ) + + +def textx_model_to_declaration( + meta_model: TextXMetaClass, file=None +) -> "PlcDeclaration": + objtype = None + name = None + members = [] + + if meta_model.functions: + model = meta_model.functions[0] + objtype = model.function_type.lower().replace("_", "") + + if meta_model.types: + model = meta_model.types[0] + type_str = type(model.type).__name__ + if "Enum" in type_str: + objtype = "enum" + elif "Struct" in type_str: + objtype = "struct" + if model.type: + print(model.type.members) + # aarg + members = [member_to_plc_declaration(m) for m in model.type.members] + elif "Union" in type_str: + objtype = "union" + if model.type: + members = [member_to_plc_declaration(m) for m in model.type.members] + else: + raise ValueError(f"Could not categorize type `{type_str}`") + + if meta_model.properties: + model = meta_model.properties[0] + objtype = "property" + + if meta_model.variable_lists: + if file is None: + raise ValueError("Cannot parse GVL without file as no naming is present") + name = os.path.splitext(os.path.basename(file))[0] + # # GVL are annoying because no naming is present in source - we need to + # # extract it from the file name + + model = meta_model.variable_lists[0] + objtype = "variable_list" + + if objtype is None: + raise ValueError(f"Unrecognized declaration in `{meta_model}`") + + if name is None: + name = model.name + comment = get_comment(model) + args = get_args(model) + + return PlcDeclaration( + objtype, name, comment=comment, args=args, members=members, file=file + ) + + +def member_to_plc_declaration(member) -> "PlcVariableDeclaration": + # print() + name = member.name + comment = member.comment.text if member.comment else "" + ty = member.type.name + return PlcVariableDeclaration( + kind="member", + name=name, + ty=ty, + comment=comment, + ) + + +def get_comment(_model) -> Optional[str]: + """Process main block comment from model into a neat list. + + A list is created for each 'region' of comments. The first comment block above + a declaration is the most common one. + """ + if hasattr(_model, "comment") and _model.comment is not None: + # Probably a comment line + big_block: str = _model.comment.text + elif hasattr(_model, "comments") and _model.comments: + # Probably a comment block (amongst multiple maybe) + block_comment = None + for comment in reversed(_model.comments): + # Find last block-comment + if type(comment).__name__ == "CommentBlock": + block_comment = comment + break + + if block_comment is None: + return None + + big_block: str = block_comment.text + else: + return None + + big_block = big_block.strip() # Get rid of whitespace + return process_comment(big_block) + + +def process_comment(big_block): + # Remove comment indicators (cannot get rid of them by TextX) + if big_block.startswith("(*"): + big_block = big_block[2:] + if big_block.endswith("*)"): + big_block = big_block[:-2] + + # It looks like Windows line endings are already lost by now, but make sure + big_block = big_block.replace("\r\n", "\n") + + return big_block + + +def get_args(model) -> List: + """Return arguments. + + :param skip_internal: If true, only return in, out and inout variables + :retval: Empty list if there are none or arguments are applicable to this type. + """ + skip_internal = True + if not hasattr(model, "lists"): + return [] + + args = [] + + for var_list in model.lists: + var_kind = var_list.name.lower() + if skip_internal and var_kind not in [ + "var_input", + "var_output", + "var_input_output", + ]: + continue # Skip internal variables `VAR` + + for var in var_list.variables: + print(var, type(var)) + args.append(textx_to_var(var_kind, var)) + + return args + + +def textx_to_var(var_kind, var): + name = var.name + ty = var.type.name + comment = var.comment.text if var.comment else "" + return PlcVariableDeclaration(kind=var_kind, name=name, ty=ty, comment=comment) + + class PlcDeclaration: """Wrapper class for the result of the TextX parsing of a PLC source file. @@ -265,52 +480,19 @@ class PlcDeclaration: The `objtype` is as they appear in :class:`StructuredTextDomain`. """ - def __init__(self, meta_model: TextXMetaClass, file=None): + def __init__( + self, objtype: str, name: str, comment=None, args=(), members=(), file=None + ): """ :param meta_model: Parsing result :param file: Path to the file this model originates from """ - self._objtype = None - self._name = None - - if meta_model.functions: - self._model = meta_model.functions[0] - self._objtype = self._model.function_type.lower().replace("_", "") - - if meta_model.types: - self._model = meta_model.types[0] - type_str = type(self._model.type).__name__ - if "Enum" in type_str: - self._objtype = "enum" - elif "Struct" in type_str: - self._objtype = "struct" - elif "Union" in type_str: - self._objtype = "union" - else: - raise ValueError(f"Could not categorize type `{type_str}`") - - if meta_model.properties: - self._model = meta_model.properties[0] - self._objtype = "property" - - if meta_model.variable_lists: - if file is None: - raise ValueError( - "Cannot parse GVL without file as no naming is present" - ) - self._name, _ = os.path.splitext(os.path.basename(file)) - # GVL are annoying because no naming is present in source - we need to - # extract it from the file name - - self._model = meta_model.variable_lists[0] - self._objtype = "variable_list" - - if self._objtype is None: - raise ValueError(f"Unrecognized declaration in `{meta_model}`") - - if self._name is None: - self._name = self._model.name + self._objtype = objtype + self._name = name + self._comment = comment + self._args = args + self._members = members self._file: Optional[str] = file self._children: Dict[str, "PlcDeclaration"] = {} @@ -339,73 +521,23 @@ def children(self) -> Dict[str, "PlcDeclaration"]: @property def members(self) -> List[TextXMetaClass]: - if not self._model.type: - return [] - return self._model.type.members - - def get_comment(self) -> Optional[str]: - """Process main block comment from model into a neat list. + return self._members - A list is created for each 'region' of comments. The first comment block above - a declaration is the most common one. - """ - if hasattr(self._model, "comment") and self._model.comment is not None: - # Probably a comment line - big_block: str = self._model.comment.text - elif hasattr(self._model, "comments") and self._model.comments: - # Probably a comment block (amongst multiple maybe) - block_comment = None - for comment in reversed(self._model.comments): - # Find last block-comment - if type(comment).__name__ == "CommentBlock": - block_comment = comment - break - - if block_comment is None: - return None - - big_block: str = block_comment.text - else: - return None - - big_block = big_block.strip() # Get rid of whitespace - - # Remove comment indicators (cannot get rid of them by TextX) - if big_block.startswith("(*"): - big_block = big_block[2:] - if big_block.endswith("*)"): - big_block = big_block[:-2] - - # It looks like Windows line endings are already lost by now, but make sure - big_block = big_block.replace("\r\n", "\n") - - return big_block - - def get_args(self, skip_internal=True) -> List: - """Return arguments. - - :param skip_internal: If true, only return in, out and inout variables - :retval: Empty list if there are none or arguments are applicable to this type. - """ - if not hasattr(self._model, "lists"): - return [] - - args = [] - - for var_list in self._model.lists: - var_kind = var_list.name.lower() - if skip_internal and var_kind not in [ - "var_input", - "var_output", - "var_input_output", - ]: - continue # Skip internal variables `VAR` - - for var in var_list.variables: - var.kind = var_kind - args.append(var) + @property + def comment(self) -> Optional[str]: + return self._comment - return args + @property + def args(self) -> List: + return self._args def add_child(self, child: "PlcDeclaration"): self._children[child.name] = child + + +@dataclass +class PlcVariableDeclaration: + kind: str + name: str + ty: str + comment: str diff --git a/src/plcdoc/parsing/__init__.py b/src/plcdoc/parsing/__init__.py new file mode 100644 index 0000000..99b7fa4 --- /dev/null +++ b/src/plcdoc/parsing/__init__.py @@ -0,0 +1,13 @@ +from .parser import parse_new + + +def parse_str(text: str): + return parse_new(text) + + +def parse_file(filename: str): + with open(filename, "r") as f: + return parse_new(f.read()) + + +__all__ = ["parse_str", "parse_file"] diff --git a/src/plcdoc/parsing/lexer.py b/src/plcdoc/parsing/lexer.py new file mode 100644 index 0000000..d73a20e --- /dev/null +++ b/src/plcdoc/parsing/lexer.py @@ -0,0 +1,217 @@ +""" +PLC lexer. + +""" + +import re + +from dataclasses import dataclass +import lark.lexer + + +class MyLexer(lark.lexer.Lexer): + def __init__(self, lexer_conf): + pass + + def lex(self, source): + # print(code) + # tokens = iter() + # tokens = map(token_filter2, tokens) + # tokens = map(, tokens) + for token in token_filter(token_filter2(tokenize(source))): + type = token.kind + yield lark.lexer.Token(type, token, line=token.row, column=token.column) + + +@dataclass +class Token: + kind: str + text: str + row: int + column: int + comment1: str + comment2: str + + +def tokenize(source: str): + # Note that order is important below: + token_spec = [ + ("COMMENT1", r"\(\*.*?\*\)"), + ("COMMENT2", r"//.*?\n"), + ("OP2", r"(:=)|(==)|(<=)|(!=)|(>=)|(\.\.)"), + ("OP", r"[<>=:;,\.\(\)\+\-\*\/\[\]]"), + ("BIN_NUMBER", r"2#[0-1][0-1_]*"), + ("OCT_NUMBER", r"8#[0-7][0-7_]*"), + ("DEC_NUMBER", r"10#[0-9][0-9_]*"), + ("HEX_NUMBER", r"16#[0-9a-fA-F][0-9a-fA-F_]*"), + ("TIME", r"T#[0-9hHmMsS]+"), + ("ADDR", r"%[A-Za-z][A-Za-z0-9]*\*"), + ("REAL1", r"[0-9][0-9_]*[eE][-+]?[0-9]+"), # example: 1E2 + ("REAL2", r"[0-9][0-9_]*\.[0-9][0-9_]*"), # example: 1.0 + ("REAL3", r"[0-9][0-9_]*\.[0-9][0-9_]*[eE][-+]?[0-9]+"), # example: 1.0E2 + ("REAL4", r"\.[0-9][0-9_]*"), # example: .1 + ("REAL5", r"\.[0-9][0-9_]*[eE][-+]?[0-9]+"), # example: .1E3 + ("NUMBER", r"[0-9][0-9_]*"), + ("ID", r"[A-Za-z_][A-Za-z_0-9]*"), + ("STRING", r"'[^']*'"), + ("SPACE", r"[ \t]+"), + ("ATTRIBUTE", r"\{.*?\}"), + ("NEWLINE", r"\n"), + ("OTHER", r"."), + ] + op_names = { + ":=": "COLON_EQUALS", + ":": "COLON", + ";": "SEMI", + ",": "COMMA", + ".": "DOT", + "..": "DOTDOT", + "+": "PLUS", + "-": "MINUS", + "*": "ASTERIX", + "/": "SLASH", + "{": "BRACE_OPEN", + "}": "BRACE_CLOSE", + "(": "PARENTHESIS_OPEN", + ")": "PARENTHESIS_CLOSE", + "[": "BRACKET_OPEN", + "]": "BRACKET_CLOSE", + } + + regex = "|".join(f"(?P<{name}>{pattern})" for name, pattern in token_spec) + row = 1 + column = 1 + + for mo in re.finditer(regex, source, re.MULTILINE | re.DOTALL): + kind: str = mo.lastgroup + value = mo.group() + if kind == "OP" or kind == "OP2": + kind = op_names[value] + elif kind == "ID": + if value in KEYWORDS: + kind = "KW_" + value + elif value in VAR_KEYWORDS: + kind = "KW_VAR" + elif value in ACCESS_KEYWORDS: + kind = "KW_ACCESS" + elif value in INTEGER_DATA_TYPES: + kind = "INTTYPE" + elif kind == "NEWLINE" or kind == "COMMENT2": + row += 1 + column = 1 + elif kind == "SPACE": + continue + elif kind.startswith("REAL"): + kind = "REAL" + elif kind.endswith("_NUMBER"): + kind = "NUMBER" + elif kind == "OTHER": + if value.isprintable(): + c = value + else: + c = str(value.encode(encoding="utf-8", errors="replace")) + raise ValueError(f"Unexpected character: {c} at ({row=},{column=})") + + yield Token(kind, value, row, column, "", "") + + yield Token("EOF", "EOF", row, column, "", "") + + +KEYWORDS = { + "ABSTRACT", + "ARRAY", + "AT", + "CONSTANT", + "END_STRUCT", + "END_TYPE", + "END_UNION", + "END_VAR", + "EXTENDS", + "FINAL", + "FUNCTION", + "FUNCTION_BLOCK", + "IMPLEMENTS", + "INTERFACE", + "METHOD", + "OF", + "PERSISTENT", + "POINTER", + "PROGRAM", + "PROPERTY", + "REFERENCE", + "STRING", + "STRUCT", + "TO", + "TYPE", + "UNION", + "WSTRING", +} + +ACCESS_KEYWORDS = { + "PRIVATE", + "PROTECTED", + "PUBLIC", + "INTERNAL", +} + +VAR_KEYWORDS = { + "VAR", + "VAR_GLOBAL", + "VAR_IN_OUT", + "VAR_INPUT", + "VAR_INST", + "VAR_OUTPUT", + "VAR_STAT", + "VAR_TEMP", +} + +INTEGER_DATA_TYPES = { + "BYTE", + "WORD", + "DWORD", + "LWORD", + "SINT", + "USINT", + "INT", + "UINT", + "DINT", + "UDINT", + "LINT", + "ULINT", +} + + +def token_filter(tokens): + """Remove comment tokens, and add comment as attribute to the next token.""" + comment1 = "" + for token in tokens: + if token.kind == "SPACE" or token.kind == "NEWLINE": + continue + elif token.kind == "COMMENT1": + comment1 = token.text + continue + elif token.kind == "COMMENT2": + continue + elif token.kind == "ATTRIBUTE": + pass + else: + if comment1: + token.comment1 = comment1 + yield token + comment1 = "" + + +def token_filter2(tokens): + previous_token = None + for token in tokens: + if token.kind == "COMMENT2": + if previous_token: + comment = token.text[2:].strip() + previous_token.comment1 = comment + + if previous_token: + yield previous_token + previous_token = token + + if previous_token: + yield previous_token diff --git a/src/plcdoc/parsing/nodes.py b/src/plcdoc/parsing/nodes.py new file mode 100644 index 0000000..2483eb7 --- /dev/null +++ b/src/plcdoc/parsing/nodes.py @@ -0,0 +1,211 @@ +""" Parsed AST nodes. + +""" + +from typing import Optional +from dataclasses import dataclass + + +@dataclass +class Function: + comment: str + kind: str + name: str + variable_lists: list["VariableList"] + + +# @dataclass +# class FunctionBlock: +# name: str + + +@dataclass +class Property: + comment: str + name: str + ty: "Type" + # init: Optional["Expression"] + + +@dataclass +class VariableList: + kind: str + flags: list[str] + variables: list["Variable"] + + +@dataclass +class Variable: + name: str + address: Optional[str] + ty: "Type" + init: Optional["Expression"] + comment: str + + +@dataclass +class TypeDef: + comment: str + name: str + ty: "Type" + + +class Type: + pass + + +@dataclass +class Struct(Type): + fields: list["StructField"] + + +@dataclass +class Union(Type): + fields: list["StructField"] + + +StructField = Variable + + +@dataclass +class Enum(Type): + options: list["EnumOption"] + base: Optional["Type"] + + +@dataclass +class EnumOption: + name: str + init: None + + +@dataclass +class LabeledArgument: + label: str + value: "Expression" + + +class Expression: + pass + + +@dataclass +class Binop(Expression): + lhs: "Expression" + op: str + rhs: "Expression" + + +@dataclass +class Unop(Expression): + op: str + rhs: "Expression" + + +@dataclass +class Call(Expression): + callee: "Expression" + arguments: list["Expression"] + + +@dataclass +class Number(Expression): + value: int + + +@dataclass +class FqNameRef(Expression): + names: str + + +def expression_to_text(expr, parens=False) -> str: + if isinstance(expr, Number): + return f"{expr.value}" + elif isinstance(expr, FqNameRef): + return ".".join(expr.names) + elif isinstance(expr, Unop): + rhs = expression_to_text(expr.rhs, parens=True) + if parens: + return f"({expr.op}{rhs})" + else: + return f"{expr.op}{rhs}" + elif isinstance(expr, Call): + callee = expression_to_text(expr.callee, parens=True) + args = ",".join(expression_to_text(a) for a in expr.arguments) + return f"{callee}({args})" + elif isinstance(expr, Binop): + lhs = expression_to_text(expr.lhs, parens=True) + rhs = expression_to_text(expr.rhs, parens=True) + if parens: + return f"({lhs} {expr.op} {rhs})" + else: + return f"{lhs} {expr.op} {rhs}" + else: + raise NotImplementedError(f"Not impl: {expr}") + + +def type_to_text(ty) -> str: + if isinstance(ty, StringType): + if ty.size: + size = expression_to_text(ty.size) + return f"STRING({size})" + else: + return "STRING" + elif isinstance(ty, IntegerType): + return ty.kind + elif isinstance(ty, FqNameRef): + return ".".join(ty.names) + elif isinstance(ty, ArrayType): + ",".join( + f"{expression_to_text(r.begin)}..{expression_to_text(r.end)}" if r else "*" + for r in ty.ranges + ) + d = 1 # TODO + e = type_to_text(ty.element_type) + return f"ARRAY [{d}] OF {e}" + elif isinstance(ty, PointerType): + e = type_to_text(ty.element_type) + return f"POINTER TO {e}" + elif isinstance(ty, ReferenceType): + e = type_to_text(ty.element_type) + return f"REFERENCE TO {e}" + else: + raise ValueError(f"Not impl: {type(ty)}") + + +@dataclass +class TypeRef: + name: str + + +@dataclass +class StringType(Type): + size: Optional["Expression"] + + +@dataclass +class IntegerType(Type): + kind: str + domain: Optional["Range"] + + +@dataclass +class ArrayType(Type): + ranges: list[Optional["Range"]] + element_type: "Type" + + +@dataclass +class PointerType(Type): + element_type: "Type" + + +@dataclass +class ReferenceType(Type): + element_type: "Type" + + +@dataclass +class Range: + begin: "Expression" + end: "Expression" diff --git a/src/plcdoc/parsing/parser.py b/src/plcdoc/parsing/parser.py new file mode 100644 index 0000000..3fe2888 --- /dev/null +++ b/src/plcdoc/parsing/parser.py @@ -0,0 +1,151 @@ +""" +Lark based PLC parser. +""" + +from .lexer import MyLexer +from .transform import MyTransformer +import logging +import lark + +logger = logging.getLogger(__name__) + + +def parse_new(text: str): + # print(text) + tree = parser.parse(text) + # print("PARSED:") + # pprint(tree, width=150) + return tree + + +grammar = """ +start: declaration+ EOF + +declaration: function | property | type_def | variable_list + +function: function_kind visibility ID (COLON type)? exim SEMI? variable_lists +function_kind: KW_PROGRAM + | KW_FUNCTION_BLOCK + | KW_FUNCTION + | KW_METHOD + | KW_INTERFACE +property: KW_PROPERTY visibility ID COLON type +exim: extends implements? +extends: (KW_EXTENDS fq_name_ref)? +implements: KW_IMPLEMENTS fq_name_ref +visibility: (KW_ABSTRACT | KW_ACCESS | KW_FINAL)? +variable_lists: variable_list* +variable_list: KW_VAR variable_list_flags variable* KW_END_VAR +variable_list_flags: (KW_CONSTANT | KW_PERSISTENT)* +variable: ids address COLON variable_type_init SEMI +variable_type_init: type initializer + | type PARENTHESIS_OPEN labeled_arguments PARENTHESIS_CLOSE + | type PARENTHESIS_OPEN expressions PARENTHESIS_CLOSE + | type PARENTHESIS_OPEN PARENTHESIS_CLOSE +address: (KW_AT ADDR)? + +type_def: KW_TYPE ID extends COLON (struct_decl | union_decl | enum_decl) KW_END_TYPE +struct_decl: KW_STRUCT variable* KW_END_STRUCT +union_decl: KW_UNION variable* KW_END_UNION +enum_decl: PARENTHESIS_OPEN enum_values PARENTHESIS_CLOSE integer_type? SEMI +enum_values: enum_value + | enum_values COMMA enum_value +enum_value: ID initializer + +initializer: (COLON_EQUALS expression)? +labeled_arguments: labeled_argument + | labeled_arguments COMMA labeled_argument +labeled_argument: ID COLON_EQUALS expression + +expressions: expression + | expressions COMMA expression + +expression: sum +sum: term + | expression (PLUS | MINUS) term +term: factor + | term (ASTERIX | SLASH) factor +factor: atom + | MINUS factor +atom: literal + | fq_name_ref + | struct_literal + | range_literal + | PARENTHESIS_OPEN expression PARENTHESIS_CLOSE + | atom PARENTHESIS_OPEN expressions PARENTHESIS_CLOSE + +ids: ID + | ids COMMA ID +fq_name_ref: ID + | fq_name_ref DOT ID + +struct_literal: PARENTHESIS_OPEN labeled_arguments PARENTHESIS_CLOSE +range_literal: PARENTHESIS_OPEN expression DOTDOT expression PARENTHESIS_CLOSE +literal: NUMBER + | REAL + | TIME + | STRING + +type: fq_name_ref + | integer_type + | string_type + | array_type + | pointer_type + | reference_type +integer_type: INTTYPE range_literal? +string_type: (KW_STRING | KW_WSTRING) + | (KW_STRING | KW_WSTRING) PARENTHESIS_OPEN expression PARENTHESIS_CLOSE + | (KW_STRING | KW_WSTRING) BRACKET_OPEN expression BRACKET_CLOSE +pointer_type: KW_POINTER KW_TO type +reference_type: KW_REFERENCE KW_TO type +array_type: KW_ARRAY BRACKET_OPEN subranges BRACKET_CLOSE KW_OF type +subranges: subrange + | subranges COMMA subrange +subrange: ASTERIX + | expression DOTDOT expression + +%declare KW_ABSTRACT +%declare KW_ARRAY +%declare KW_ACCESS +%declare KW_AT +%declare KW_CONSTANT +%declare KW_END_STRUCT +%declare KW_END_TYPE +%declare KW_END_UNION +%declare KW_END_VAR +%declare KW_EXTENDS +%declare KW_FINAL +%declare KW_FUNCTION +%declare KW_FUNCTION_BLOCK +%declare KW_IMPLEMENTS +%declare KW_INTERFACE +%declare KW_METHOD +%declare KW_OF +%declare KW_PERSISTENT +%declare KW_PROPERTY +%declare KW_PROGRAM +%declare KW_POINTER +%declare KW_STRUCT +%declare KW_REFERENCE +%declare KW_STRING +%declare KW_TO +%declare KW_TYPE +%declare KW_UNION +%declare KW_VAR +%declare KW_WSTRING + +%declare ID +%declare NUMBER REAL +%declare TIME ADDR +%declare STRING INTTYPE +%declare COLON_EQUALS +%declare COLON SEMI COMMA DOT DOTDOT +%declare PLUS MINUS ASTERIX SLASH +%declare BRACE_OPEN BRACE_CLOSE +%declare PARENTHESIS_OPEN PARENTHESIS_CLOSE +%declare BRACKET_OPEN BRACKET_CLOSE +%declare EOF + +""" + +parser = lark.Lark(grammar, parser="lalr", transformer=MyTransformer(), lexer=MyLexer) diff --git a/src/plcdoc/parsing/transform.py b/src/plcdoc/parsing/transform.py new file mode 100644 index 0000000..c91a87a --- /dev/null +++ b/src/plcdoc/parsing/transform.py @@ -0,0 +1,226 @@ +import lark +from . import nodes as ast + + +class MyTransformer(lark.Transformer): + def start(self, rhs): + # TODO: we can have multiple declarations + # For example VAR_GLOBAL .. VAR_GLOBAL CONSTANT + return rhs[0] + + def declaration(self, rhs): + return rhs[0] + + def visibility(self, rhs): + return 1 + + def function(self, rhs): + # print("FUNC", rhs) + comment, kind = rhs[0] + name = rhs[2].value.text + variable_lists = rhs[-1] + return ast.Function( + comment=comment, kind=kind, name=name, variable_lists=variable_lists + ) + + def function_kind(self, rhs): + comment = rhs[0].value.comment1 + kind = rhs[0].value.text.lower().replace("_", "") + return comment, kind + + def property(self, rhs): + comment = rhs[0].value.comment1 + name = rhs[2].value.text + ty = rhs[4] + return ast.Property(comment, name, ty) + + def type_def(self, rhs): + comment = rhs[0].value.comment1 + name = rhs[1].value.text + ty = rhs[-2] + return ast.TypeDef(comment=comment, name=name, ty=ty) + + def enum_decl(self, rhs): + options = rhs[1] + base = rhs[-2] if len(rhs) == 5 else None + return ast.Enum(options, base) + + def enum_values(self, rhs): + return comma(rhs) + + def enum_value(self, rhs): + name = rhs[0].value.text + init = rhs[1] + return ast.EnumOption(name, init) + + def struct_decl(self, rhs): + fields = rhs[1:-1] + return ast.Struct(fields) + + def union_decl(self, rhs): + fields = rhs[1:-1] + return ast.Union(fields) + + def variable_lists(self, rhs): + return rhs + + def variable_list(self, rhs): + kind = rhs[0].value.text + flags = rhs[1] + variables = rhs[2:-1] + return ast.VariableList(kind, flags, variables) + + def variable_list_flags(self, rhs): + return [r.value.text for r in rhs] + + def variable(self, rhs): + # print("VAR", rhs) + names = rhs[0] + name = names[0] + # TODO: support more than 1 name? + address = rhs[1] + ty, init = rhs[3] + comment = rhs[-1].value.comment1 + return ast.Variable(name, address, ty, init, comment) + + def variable_type_init(self, rhs): + ty = rhs[0] + if len(rhs) == 2: + init = rhs[1] + elif len(rhs) == 3: + init = None + else: + init = rhs[2] + return (ty, init) + + def address(self, rhs): + if len(rhs) == 2: + return rhs[1].value.text + + def initializer(self, rhs): + if len(rhs) == 2: + return rhs[1] + + def labeled_arguments(self, rhs): + return comma(rhs) + + def labeled_argument(self, rhs): + label = rhs[0].value.text + value = rhs[2] + return ast.LabeledArgument(label, value) + + def expressions(self, rhs): + return comma(rhs) + + def expression(self, rhs): + return rhs[0] + + def sum(self, rhs): + return binop(rhs) + + def term(self, rhs): + return binop(rhs) + + def factor(self, rhs): + if len(rhs) == 1: + return rhs[0] + else: + op = rhs[0].value.text + return ast.Unop(op, rhs[1]) + + def atom(self, rhs): + if len(rhs) == 1: + return rhs[0] + elif len(rhs) == 3: + return rhs[1] + else: + assert len(rhs) == 4 + callee = rhs[0] + args = rhs[2] + return ast.Call(callee, args) + + def literal(self, rhs): + value = rhs[0].value.text + return ast.Number(value) + + def struct_literal(self, rhs): + return rhs[1] + + def range_literal(self, rhs): + begin = rhs[1] + end = rhs[3] + return ast.Range(begin, end) + + def ids(self, rhs): + if len(rhs) == 1: + name = rhs[0].value.text + names = [name] + else: + name = rhs[2].value.text + names = rhs[0] + [name] + return names + + def fq_name_ref(self, rhs): + if len(rhs) == 1: + name = rhs[0].value.text + names = [name] + else: + name = rhs[2].value.text + names = rhs[0].names + [name] + return ast.FqNameRef(names) + + def type(self, rhs): + # TODO: handle range indicator for integer types. + return rhs[0] + + def integer_type(self, rhs): + ty = rhs[0].value.text + domain = rhs[1] if len(rhs) > 1 else None + return ast.IntegerType(ty, domain) + + def string_type(self, rhs): + size = rhs[2] if len(rhs) == 4 else None + return ast.StringType(size) + + def pointer_type(self, rhs): + ty = rhs[-1] + return ast.PointerType(ty) + + def reference_type(self, rhs): + ty = rhs[-1] + return ast.ReferenceType(ty) + + def array_type(self, rhs): + ranges = rhs[2] + element_type = rhs[5] + return ast.ArrayType(ranges, element_type) + + def subranges(self, rhs): + return comma(rhs) + + def subrange(self, rhs): + if len(rhs) == 1: + return None + else: + begin = rhs[0] + end = rhs[2] + return ast.Range(begin, end) + + +def binop(rhs) -> ast.Binop: + if len(rhs) == 1: + return rhs[0] + else: + assert len(rhs) == 3 + lhs = rhs[0] + op = rhs[1].value.text + rhs = rhs[2] + return ast.Binop(lhs, op, rhs) + + +def comma(rhs): + """Handle a rule with one or more items, seperated by commas""" + if len(rhs) == 1: + return [rhs[0]] + else: + return rhs[0] + [rhs[2]]