From cd3733b7767d6400a6b3c2ba226a0abb7f0a50c7 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Tue, 30 Apr 2024 22:19:55 +0200 Subject: [PATCH 1/4] First draft of lark based parsing --- src/plcdoc/parsing/__init__.py | 13 +++ src/plcdoc/parsing/lexer.py | 145 +++++++++++++++++++++++++++++ src/plcdoc/parsing/nodes.py | 112 ++++++++++++++++++++++ src/plcdoc/parsing/parser.py | 136 +++++++++++++++++++++++++++ src/plcdoc/parsing/transform.py | 159 ++++++++++++++++++++++++++++++++ 5 files changed, 565 insertions(+) create mode 100644 src/plcdoc/parsing/__init__.py create mode 100644 src/plcdoc/parsing/lexer.py create mode 100644 src/plcdoc/parsing/nodes.py create mode 100644 src/plcdoc/parsing/parser.py create mode 100644 src/plcdoc/parsing/transform.py diff --git a/src/plcdoc/parsing/__init__.py b/src/plcdoc/parsing/__init__.py new file mode 100644 index 0000000..99b7fa4 --- /dev/null +++ b/src/plcdoc/parsing/__init__.py @@ -0,0 +1,13 @@ +from .parser import parse_new + + +def parse_str(text: str): + return parse_new(text) + + +def parse_file(filename: str): + with open(filename, "r") as f: + return parse_new(f.read()) + + +__all__ = ["parse_str", "parse_file"] diff --git a/src/plcdoc/parsing/lexer.py b/src/plcdoc/parsing/lexer.py new file mode 100644 index 0000000..9c347a6 --- /dev/null +++ b/src/plcdoc/parsing/lexer.py @@ -0,0 +1,145 @@ +""" +PLC lexer. + +""" + +import re + +from dataclasses import dataclass +import lark.lexer + + +class MyLexer(lark.lexer.Lexer): + def __init__(self, lexer_conf): + pass + + def lex(self, source): + # print(code) + for token in token_filter(tokenize(source)): + type = token.kind + yield lark.lexer.Token(type, token) + + +@dataclass +class Token: + kind: str + text: str + row: int + column: int + comment1: str + + +def tokenize(source: str): + # Note that order is important below: + token_spec = [ + ("COMMENT1", r"\(\*.*?\*\)"), + ("COMMENT2", r"//.*?\n"), + ("OP2", r"(:=)|(==)|(<=)|(!=)|(>=)|(\.\.)"), + ("OP", r"[<>=:;,\.\(\)\+\-\*\/]"), + ("BIN_NUMBER", r"2#[0-1][0-1_]*"), + ("OCT_NUMBER", r"8#[0-7]+"), + ("HEX_NUMBER", r"16#[0-9a-fA-F][0-9a-fA-F_]*"), + ("TIME", r"T#[0-9a-fA-F][0-9a-fA-F_]*"), + ("REAL", r"[0-9]+\.[0-9]+"), + ("NUMBER", r"[0-9]+"), + ("ID", r"[A-Za-z][A-Za-z_0-9]*"), + ("STRING", r"'[^']*'"), + ("SPACE", r"[ \t]+"), + ("ATTRIBUTE", r"\{attribute.*?\}"), + ("NEWLINE", r"\n"), + ("OTHER", r"."), + ] + op_names = { + ":=": "COLON_EQUALS", + ":": "COLON", + ";": "SEMI", + ",": "COMMA", + ".": "DOT", + "..": "DOTDOT", + "+": "PLUS", + "-": "MINUS", + "*": "ASTERIX", + "/": "SLASH", + "{": "BRACE_OPEN", + "}": "BRACE_CLOSE", + "(": "PARENTHESIS_OPEN", + ")": "PARENTHESIS_CLOSE", + "[": "BRACKET_OPEN", + "]": "BRACKET_CLOSE", + } + + regex = "|".join(f"(?P<{name}>{pattern})" for name, pattern in token_spec) + row = 1 + column = 1 + + for mo in re.finditer(regex, source, re.MULTILINE | re.DOTALL): + kind: str = mo.lastgroup + value = mo.group() + if kind == "OP" or kind == "OP2": + kind = op_names[value] + elif kind == "ID": + if value in KEYWORDS: + kind = "KW_" + value + elif kind == "NEWLINE": + row += 1 + elif kind == "OTHER": + if value.isprintable(): + c = value + else: + c = str(value.encode(encoding="utf-8", errors="replace")) + raise ValueError(f"Unexpected character: {c}") + + yield Token(kind, value, row, column, "") + + yield Token("EOF", "EOF", row, column, "") + + +KEYWORDS = { + "ABSTRACT", + "ARRAY", + "END_STRUCT", + "END_TYPE", + "END_VAR", + "EXTENDS", + "FINAL", + "FUNCTION", + "FUNCTION_BLOCK", + "INTERFACE", + "INTERNAL", + "METHOD", + "OF", + "POINTER", + "PROGRAM", + "PROPERTY", + "PRIVATE", + "PROTECTED", + "PUBLIC", + "REFERENCE", + "STRING", + "STRUCT", + "TO", + "TYPE", + "VAR", + "VAR_GLOBAL", + "VAR_INPUT", + "VAR_OUTPUT", +} + + +def token_filter(tokens): + comment1 = "" + attr = "" + for token in tokens: + if token.kind == "SPACE" or token.kind == "NEWLINE": + continue + elif token.kind == "COMMENT1": + comment1 = token.text + continue + elif token.kind == "COMMENT2": + continue + elif token.kind == "ATTRIBUTE": + attr = token.text + else: + token.comment1 = comment1 + yield token + comment1 = "" diff --git a/src/plcdoc/parsing/nodes.py b/src/plcdoc/parsing/nodes.py new file mode 100644 index 0000000..04271ae --- /dev/null +++ b/src/plcdoc/parsing/nodes.py @@ -0,0 +1,112 @@ +""" Parsed AST nodes. + +""" + +from typing import Optional, Any, Union +from dataclasses import dataclass + + +@dataclass +class Function: + comment1: str + kind: str + name: str + variable_lists: list["VariableList"] + + +# @dataclass +# class FunctionBlock: +# name: str + +@dataclass +class Property: + name: str + ty: "Type" + # init: Optional["Expression"] + +@dataclass +class VariableList: + kind: str + variables: list["Variable"] + + +@dataclass +class Variable: + name: str + ty: "Type" + init: Optional["Expression"] + + +@dataclass +class TypeDef: + name: str + ty: "Type" + + +class Type: + pass + + +@dataclass +class Struct(Type): + fields: list["StructField"] + + +@dataclass +class Union(Type): + fields: list["StructField"] + + +StructField = Variable + + +@dataclass +class Enum(Type): + options: list["EnumOption"] + + +@dataclass +class EnumOption: + name: str + init: None + + +@dataclass +class LabeledArgument: + label: str + value: "Expression" + +class Expression: + pass + + +@dataclass +class Binop(Expression): + lhs: "Expression" + op: str + rhs: "Expression" + + +@dataclass +class Number(Expression): + value: int + + +@dataclass +class NameRef(Expression): + name: str + + +@dataclass +class TypeRef: + name: str + +@dataclass +class Array: + ranges: list["Range"] + element_type: "Type" + +@dataclass +class Range: + begin: "Expression" + end: "Expression" diff --git a/src/plcdoc/parsing/parser.py b/src/plcdoc/parsing/parser.py new file mode 100644 index 0000000..5069366 --- /dev/null +++ b/src/plcdoc/parsing/parser.py @@ -0,0 +1,136 @@ +""" +Lark based PLC parser. +""" + + +from .lexer import MyLexer +from .transform import MyTransformer +from pprint import pprint +import logging +import lark + +logger = logging.getLogger(__name__) + + +def parse_new(text: str): + print(text) + # for token in tokenize(text): + # print(token) + + tree = parser.parse(text) + # print("PARSED", tree) + print("PARSED:") + pprint(tree) + return tree + + +grammar = """ +start: declaration EOF + +declaration: function | property | type_def | variable_list + +function: (KW_PROGRAM | KW_FUNCTION_BLOCK | KW_FUNCTION | KW_METHOD | KW_INTERFACE) visibility ID (COLON type)? extends? SEMI? variable_lists +property: KW_PROPERTY visibility ID COLON type +extends: KW_EXTENDS ID +visibility: (KW_ABSTRACT | KW_PUBLIC | KW_PRIVATE | KW_PROTECTED | KW_INTERNAL | KW_FINAL)? +variable_lists: variable_list* +variable_list: (KW_VAR_INPUT | KW_VAR_OUTPUT | KW_VAR_GLOBAL | KW_VAR) variable* KW_END_VAR +variable: ID COLON type initializer? SEMI + +type_def: KW_TYPE ID extends? COLON (struct | union | enum) KW_END_TYPE +struct: KW_STRUCT variable* KW_END_STRUCT +union: KW_UNION variable* KW_END_UNION +enum: PARENTHESIS_OPEN enum_values PARENTHESIS_CLOSE SEMI +enum_values: enum_value + | enum_values COMMA enum_value +enum_value: ID initializer? + +initializer: COLON_EQUALS expression +labeled_arguments: labeled_argument + | labeled_arguments COMMA labeled_argument +labeled_argument: ID COLON_EQUALS expression + +expressions: expression + | expressions COMMA expression +expression: term + | expression (PLUS | MINUS) term +term: factor + | term (ASTERIX | SLASH) factor +factor: atom +atom: literal + | name_ref + | struct_literal + | range_literal + | PARENTHESIS_OPEN expression PARENTHESIS_CLOSE +name_ref: ID +struct_literal: PARENTHESIS_OPEN labeled_arguments PARENTHESIS_CLOSE +range_literal: PARENTHESIS_OPEN expression DOTDOT expression PARENTHESIS_CLOSE +literal: NUMBER + | REAL + | BIN_NUMBER + | OCT_NUMBER + | HEX_NUMBER + | STRING + +type: name_ref + | string_type + | array_type + | pointer_type + | reference_type +string_type: KW_STRING + | KW_STRING PARENTHESIS_OPEN expression PARENTHESIS_CLOSE + | KW_STRING BRACKET_OPEN expression BRACKET_CLOSE +array_type: KW_ARRAY BRACKET_OPEN subranges BRACKET_CLOSE KW_OF type +pointer_type: KW_POINTER KW_TO ID +reference_type: KW_REFERENCE KW_TO ID + +subranges: subrange + | subranges COMMA subrange +subrange: ASTERIX + | expression DOTDOT expression + +%declare KW_ABSTRACT +%declare KW_PROGRAM +%declare KW_FUNCTION +%declare KW_FUNCTION_BLOCK +%declare KW_INTERFACE +%declare KW_METHOD +%declare KW_PROPERTY +%declare KW_EXTENDS +%declare KW_FINAL +%declare KW_PUBLIC +%declare KW_PRIVATE +%declare KW_PROTECTED +%declare KW_INTERNAL +%declare KW_TYPE +%declare KW_END_TYPE +%declare KW_POINTER +%declare KW_STRUCT +%declare KW_END_STRUCT +%declare KW_UNION +%declare KW_END_UNION +%declare KW_STRING +%declare KW_ARRAY +%declare KW_OF +%declare KW_REFERENCE +%declare KW_TO +%declare KW_VAR_GLOBAL +%declare KW_VAR_INPUT +%declare KW_VAR_OUTPUT +%declare KW_VAR +%declare KW_END_VAR + +%declare ID +%declare NUMBER REAL BIN_NUMBER OCT_NUMBER HEX_NUMBER +%declare STRING +%declare COLON_EQUALS +%declare COLON SEMI COMMA DOT DOTDOT +%declare PLUS MINUS ASTERIX SLASH +%declare BRACE_OPEN BRACE_CLOSE +%declare PARENTHESIS_OPEN PARENTHESIS_CLOSE +%declare BRACKET_OPEN BRACKET_CLOSE +%declare EOF + +""" + +parser = lark.Lark(grammar, parser="lalr", transformer=MyTransformer(), lexer=MyLexer) diff --git a/src/plcdoc/parsing/transform.py b/src/plcdoc/parsing/transform.py new file mode 100644 index 0000000..78a487a --- /dev/null +++ b/src/plcdoc/parsing/transform.py @@ -0,0 +1,159 @@ +import lark +from . import nodes as ast + + +class MyTransformer(lark.Transformer): + def start(self, rhs): + return rhs[0] + + def declaration(self, rhs): + return rhs[0] + + def visibility(self, rhs): + return 1 + + def function(self, rhs): + comment1 = rhs[0].value.comment1 + print("FUNC", rhs) + kind = rhs[0].value.text.lower().replace("_", "") + index = 1 + if isinstance(rhs[index], int): + index += 1 + name = rhs[index].value.text + variable_lists = rhs[-1] + return ast.Function(comment1, kind, name, variable_lists) + + def property(self, rhs): + name = rhs[2].value.text + ty = rhs[4] + return ast.Property(name, ty) + + def type_def(self, rhs): + name = rhs[1].value.text + ty = rhs[-2] + return ast.TypeDef(name, ty) + + def enum(self, rhs): + options = rhs[1] + return ast.Enum(options) + + def enum_values(self, rhs): + return comma(rhs) + + def enum_value(self, rhs): + name = rhs[0].value.text + if len(rhs) > 1: + init = rhs[1] + else: + init = None + return ast.EnumOption(name, init) + + def struct(self, rhs): + fields = rhs[1:-1] + return ast.Struct(fields) + + def union(self, rhs): + fields = rhs[1:-1] + return ast.Union(fields) + + def variable_lists(self, rhs): + return rhs + + def variable_list(self, rhs): + kind = rhs[0].value.text + variables = rhs[1:-1] + return ast.VariableList(kind, variables) + + def variable(self, rhs): + name = rhs[0].value.text + ty = rhs[2] + if len(rhs) > 4: + init = rhs[3] + else: + init = None + return ast.Variable(name, ty, init) + + def initializer(self, rhs): + return rhs[1] + + def labeled_arguments(self, rhs): + return comma(rhs) + + def labeled_argument(self, rhs): + label = rhs[0].value.text + value = rhs[2] + return ast.LabeledArgument(label, value) + + def expressions(self, rhs): + return comma(rhs) + + def expression(self, rhs): + return binop(rhs) + + def term(self, rhs): + return binop(rhs) + + def atom(self, rhs): + if len(rhs) == 1: + return rhs[0] + else: + assert len(rhs) == 3 + return rhs[1] + + def literal(self, rhs): + value = rhs[0].value.text + return ast.Number(value) + + def struct_literal(self, rhs): + return rhs[1] + + def range_literal(self, rhs): + begin = rhs[1] + end = rhs[3] + return ast.Range(begin, end) + + def name_ref(self, rhs): + name = rhs[0].value.text + return ast.NameRef(name) + + def type(self, rhs): + return rhs[0] + + def string_type(self, rhs): + name = rhs[0].value.text + return ast.TypeRef(name) + + def array_type(self, rhs): + ranges = rhs[2] + element_type = rhs[5] + return ast.Array(ranges, element_type) + + def subranges(self, rhs): + return comma(rhs) + + def subrange(self, rhs): + if len(rhs) == 1: + return None + else: + begin = rhs[0] + end = rhs[2] + return ast.Range(begin, end) + + +def binop(rhs) -> ast.Binop: + if len(rhs) == 1: + return rhs[0] + else: + assert len(rhs) == 3 + lhs = rhs[0] + op = rhs[1].value.text + rhs = rhs[2] + return ast.Binop(lhs, op, rhs) + + +def comma(rhs): + """Handle a rule with one or more items, seperated by commas""" + if len(rhs) == 1: + return [rhs[0]] + else: + return rhs[0] + [rhs[2]] From a40b858391527103992ef4e685183a09166e36c9 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Fri, 3 May 2024 06:48:12 +0200 Subject: [PATCH 2/4] Add data class for variable declarations. --- src/plcdoc/documenters.py | 35 ++-- src/plcdoc/interpreter.py | 341 ++++++++++++++++++++++++-------------- 2 files changed, 232 insertions(+), 144 deletions(-) diff --git a/src/plcdoc/documenters.py b/src/plcdoc/documenters.py index 4f67467..ebb7824 100644 --- a/src/plcdoc/documenters.py +++ b/src/plcdoc/documenters.py @@ -13,7 +13,7 @@ ) from docutils.statemachine import StringList -from .interpreter import PlcInterpreter, PlcDeclaration, TextXMetaClass +from .interpreter import PlcInterpreter, PlcDeclaration, PlcVariableDeclaration logger = logging.getLogger(__name__) @@ -172,7 +172,7 @@ def format_name(self) -> str: def format_args(self, **kwargs: Any) -> Optional[str]: """Format arguments for signature, based on auto-data.""" - arg_strs = [f"{var.name}" for var in self.object.get_args()] + arg_strs = [f"{var.name}" for var in self.object.args] return "(" + ", ".join(arg_strs) + ")" @@ -205,10 +205,10 @@ def add_content(self, more_content: Optional[StringList]) -> None: # Also add VARs from meta-model args_block = [] - for var in self.object.get_args(): - line_param = f":{var.kind} {var.type.name} {var.name}:" - if var.comment and var.comment.text: - line_param += " " + var.comment.text + for var in self.object.args: + line_param = f":{var.kind} {var.ty} {var.name}:" + if var.comment: + line_param += " " + var.comment args_block.append(line_param) if args_block: @@ -230,7 +230,7 @@ def get_doc(self) -> Optional[List[List[str]]]: """Get docstring from the meta-model.""" # Read main docblock - comment_str = self.object.get_comment() + comment_str = self.object.comment if not comment_str: return [] @@ -393,10 +393,9 @@ def document_members(self, all_members: bool = False) -> None: member_documenters = [ PlcStructMemberDocumenter( self.directive, - member.name, - self.indent, - parent=self.object, member=member, + indent=self.indent, + parent=self.object, ) for member in self.object.members ] @@ -431,12 +430,11 @@ class PlcStructMemberDocumenter(PlcDataDocumenter): def __init__( self, directive, - name: str, + member: PlcVariableDeclaration, indent: str = "", parent: PlcDeclaration = None, - member: Optional[TextXMetaClass] = None, ) -> None: - super().__init__(directive, name, indent) + super().__init__(directive, member.name, indent) self.object = parent self.member = member @@ -444,23 +442,22 @@ def __init__( @classmethod def can_document_member( cls, - member: Union[PlcDeclaration, Any], + member: PlcVariableDeclaration, membername: str, isattr: bool, parent: Any, ) -> bool: - return type(member).__name__ == "Variable" - # Note: a TextX variable class is passed, not a complete PlcDeclaration + return isinstance(member, PlcVariableDeclaration) and member.kind == "member" def import_object(self, raiseerror: bool = False) -> bool: return self.member is not None # Expect member through constructor def get_doc(self) -> Optional[List[List[str]]]: # Read main docblock - if self.member is None or self.member.comment is None: + if self.member is None: return [] - comment_str = self.member.comment.text + comment_str = self.member.comment if not comment_str: return [] @@ -471,7 +468,7 @@ def format_signature(self, **kwargs: Any) -> str: return "" # Insert the known variable type - return f" : {self.member.type.name}" + return f" : {self.member.ty}" class PlcFolderDocumenter(PlcDataDocumenter): diff --git a/src/plcdoc/interpreter.py b/src/plcdoc/interpreter.py index 8bf8788..560f519 100644 --- a/src/plcdoc/interpreter.py +++ b/src/plcdoc/interpreter.py @@ -2,9 +2,12 @@ import os from typing import List, Dict, Optional, Any +from dataclasses import dataclass from glob import glob import logging +from .parsing import parse_new, nodes as ast import xml.etree.ElementTree as ET + from textx import metamodel_from_file, TextXSyntaxError PACKAGE_DIR = os.path.dirname(__file__) @@ -137,42 +140,45 @@ def _parse_file(self, filepath) -> bool: # Name is repeated inside the declaration, use it from there instead # name = item.attrib["Name"] - object_model = self._parse_declaration(item) - if object_model is None: + obj = self._parse_declaration(item, filepath) + if obj is None: continue - obj = PlcDeclaration(object_model, filepath) - # Methods are inside their own subtree with a `Declaration` - simply append # them to the object for node in item: if node.tag in ["Declaration", "Implementation"]: continue - method_model = self._parse_declaration(node) - if method_model is None: + method = self._parse_declaration(node, filepath) + if method is None: continue - method = PlcDeclaration(method_model, filepath) obj.add_child(method) self._add_model(obj) return True - def _parse_declaration(self, item) -> Optional["TextXMetaClass"]: + def _parse_declaration(self, item, filepath) -> Optional["TextXMetaClass"]: declaration_node = item.find("Declaration") if declaration_node is None: return None - try: - meta_model = self._meta_model.model_from_str(declaration_node.text) - return meta_model - except TextXSyntaxError as err: - name = item.attrib.get("Name", "") - logger.error( - "Error parsing node `%s` in file `%s`\n(%s)", - name, - self._active_file, - str(err), - ) + + use_textx = True + if use_textx: + try: + meta_model = self._meta_model.model_from_str(declaration_node.text) + return textx_model_to_declaration(meta_model, filepath) + except TextXSyntaxError as err: + name = item.attrib.get("Name", "") + logger.error( + "Error parsing node `%s` in file `%s`\n(%s)", + name, + self._active_file, + str(err), + ) + else: + node = parse_new(declaration_node.text) + return ast_node_to_plc_declaration(node, filepath) return None @@ -255,6 +261,174 @@ def get_objects_in_folder(self, folder: str) -> List["PlcDeclaration"]: raise KeyError(f"Found no models in the folder `{folder}`") +def ast_node_to_plc_declaration(node, file) -> "PlcDeclaration": + objtype = None + name = None + + if isinstance(node, ast.Function): + name = node.name + objtype = node.kind + elif isinstance(node, ast.TypeDef): + name = node.name + objtype = str(node.ty) + elif isinstance(node, ast.Property): + objtype = "property" + name = node.name + elif isinstance(node, ast.VariableList): + if file is None: + raise ValueError("Cannot parse GVL without file as no naming is present") + name = os.path.splitext(os.path.basename(file))[0] + objtype = "variable_list" + else: + raise ValueError(f"Unrecognized declaration in `{node}`") + + assert name is not None + return PlcDeclaration(objtype, name, file) + + +def textx_model_to_declaration( + meta_model: TextXMetaClass, file=None +) -> "PlcDeclaration": + objtype = None + name = None + members = [] + + if meta_model.functions: + model = meta_model.functions[0] + objtype = model.function_type.lower().replace("_", "") + + if meta_model.types: + model = meta_model.types[0] + type_str = type(model.type).__name__ + if "Enum" in type_str: + objtype = "enum" + elif "Struct" in type_str: + objtype = "struct" + if model.type: + print(model.type.members) + # aarg + members = [member_to_plc_declaration(m) for m in model.type.members] + elif "Union" in type_str: + objtype = "union" + if model.type: + members = [member_to_plc_declaration(m) for m in model.type.members] + else: + raise ValueError(f"Could not categorize type `{type_str}`") + + if meta_model.properties: + model = meta_model.properties[0] + objtype = "property" + + if meta_model.variable_lists: + if file is None: + raise ValueError("Cannot parse GVL without file as no naming is present") + name = os.path.splitext(os.path.basename(file))[0] + # # GVL are annoying because no naming is present in source - we need to + # # extract it from the file name + + model = meta_model.variable_lists[0] + objtype = "variable_list" + + if objtype is None: + raise ValueError(f"Unrecognized declaration in `{meta_model}`") + + if name is None: + name = model.name + comment = get_comment(model) + args = get_args(model) + + return PlcDeclaration( + objtype, name, comment=comment, args=args, members=members, file=file + ) + + +def member_to_plc_declaration(member) -> "PlcVariableDeclaration": + # print() + name = member.name + comment = member.comment.text if member.comment else "" + ty = member.type.name + return PlcVariableDeclaration( + kind="member", + name=name, + ty=ty, + comment=comment, + ) + + +def get_comment(_model) -> Optional[str]: + """Process main block comment from model into a neat list. + + A list is created for each 'region' of comments. The first comment block above + a declaration is the most common one. + """ + if hasattr(_model, "comment") and _model.comment is not None: + # Probably a comment line + big_block: str = _model.comment.text + elif hasattr(_model, "comments") and _model.comments: + # Probably a comment block (amongst multiple maybe) + block_comment = None + for comment in reversed(_model.comments): + # Find last block-comment + if type(comment).__name__ == "CommentBlock": + block_comment = comment + break + + if block_comment is None: + return None + + big_block: str = block_comment.text + else: + return None + + big_block = big_block.strip() # Get rid of whitespace + + # Remove comment indicators (cannot get rid of them by TextX) + if big_block.startswith("(*"): + big_block = big_block[2:] + if big_block.endswith("*)"): + big_block = big_block[:-2] + + # It looks like Windows line endings are already lost by now, but make sure + big_block = big_block.replace("\r\n", "\n") + + return big_block + + +def get_args(model) -> List: + """Return arguments. + + :param skip_internal: If true, only return in, out and inout variables + :retval: Empty list if there are none or arguments are applicable to this type. + """ + skip_internal = True + if not hasattr(model, "lists"): + return [] + + args = [] + + for var_list in model.lists: + var_kind = var_list.name.lower() + if skip_internal and var_kind not in [ + "var_input", + "var_output", + "var_input_output", + ]: + continue # Skip internal variables `VAR` + + for var in var_list.variables: + print(var, type(var)) + args.append(textx_to_var(var_kind, var)) + + return args + + +def textx_to_var(var_kind, var): + name = var.name + ty = var.type.name + comment = var.comment.text if var.comment else "" + return PlcVariableDeclaration(kind=var_kind, name=name, ty=ty, comment=comment) + + class PlcDeclaration: """Wrapper class for the result of the TextX parsing of a PLC source file. @@ -265,52 +439,19 @@ class PlcDeclaration: The `objtype` is as they appear in :class:`StructuredTextDomain`. """ - def __init__(self, meta_model: TextXMetaClass, file=None): + def __init__( + self, objtype: str, name: str, comment=None, args=(), members=(), file=None + ): """ :param meta_model: Parsing result :param file: Path to the file this model originates from """ - self._objtype = None - self._name = None - - if meta_model.functions: - self._model = meta_model.functions[0] - self._objtype = self._model.function_type.lower().replace("_", "") - - if meta_model.types: - self._model = meta_model.types[0] - type_str = type(self._model.type).__name__ - if "Enum" in type_str: - self._objtype = "enum" - elif "Struct" in type_str: - self._objtype = "struct" - elif "Union" in type_str: - self._objtype = "union" - else: - raise ValueError(f"Could not categorize type `{type_str}`") - - if meta_model.properties: - self._model = meta_model.properties[0] - self._objtype = "property" - - if meta_model.variable_lists: - if file is None: - raise ValueError( - "Cannot parse GVL without file as no naming is present" - ) - self._name, _ = os.path.splitext(os.path.basename(file)) - # GVL are annoying because no naming is present in source - we need to - # extract it from the file name - - self._model = meta_model.variable_lists[0] - self._objtype = "variable_list" - - if self._objtype is None: - raise ValueError(f"Unrecognized declaration in `{meta_model}`") - - if self._name is None: - self._name = self._model.name + self._objtype = objtype + self._name = name + self._comment = comment + self._args = args + self._members = members self._file: Optional[str] = file self._children: Dict[str, "PlcDeclaration"] = {} @@ -339,73 +480,23 @@ def children(self) -> Dict[str, "PlcDeclaration"]: @property def members(self) -> List[TextXMetaClass]: - if not self._model.type: - return [] - return self._model.type.members - - def get_comment(self) -> Optional[str]: - """Process main block comment from model into a neat list. - - A list is created for each 'region' of comments. The first comment block above - a declaration is the most common one. - """ - if hasattr(self._model, "comment") and self._model.comment is not None: - # Probably a comment line - big_block: str = self._model.comment.text - elif hasattr(self._model, "comments") and self._model.comments: - # Probably a comment block (amongst multiple maybe) - block_comment = None - for comment in reversed(self._model.comments): - # Find last block-comment - if type(comment).__name__ == "CommentBlock": - block_comment = comment - break - - if block_comment is None: - return None - - big_block: str = block_comment.text - else: - return None - - big_block = big_block.strip() # Get rid of whitespace - - # Remove comment indicators (cannot get rid of them by TextX) - if big_block.startswith("(*"): - big_block = big_block[2:] - if big_block.endswith("*)"): - big_block = big_block[:-2] - - # It looks like Windows line endings are already lost by now, but make sure - big_block = big_block.replace("\r\n", "\n") - - return big_block - - def get_args(self, skip_internal=True) -> List: - """Return arguments. + return self._members - :param skip_internal: If true, only return in, out and inout variables - :retval: Empty list if there are none or arguments are applicable to this type. - """ - if not hasattr(self._model, "lists"): - return [] - - args = [] - - for var_list in self._model.lists: - var_kind = var_list.name.lower() - if skip_internal and var_kind not in [ - "var_input", - "var_output", - "var_input_output", - ]: - continue # Skip internal variables `VAR` - - for var in var_list.variables: - var.kind = var_kind - args.append(var) + @property + def comment(self) -> Optional[str]: + return self._comment - return args + @property + def args(self) -> List: + return self._args def add_child(self, child: "PlcDeclaration"): self._children[child.name] = child + + +@dataclass +class PlcVariableDeclaration: + kind: str + name: str + ty: str + comment: str From 2f84d00ac24ec6cb193d28be0303ff69c97cff62 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Sun, 5 May 2024 13:37:22 +0200 Subject: [PATCH 3/4] Test lark parser on test projects. --- src/plcdoc/documenters.py | 2 +- src/plcdoc/interpreter.py | 49 ++++++++++- src/plcdoc/parsing/lexer.py | 112 ++++++++++++++++++++----- src/plcdoc/parsing/nodes.py | 111 +++++++++++++++++++++++-- src/plcdoc/parsing/parser.py | 129 ++++++++++++++++------------- src/plcdoc/parsing/transform.py | 141 +++++++++++++++++++++++--------- 6 files changed, 419 insertions(+), 125 deletions(-) diff --git a/src/plcdoc/documenters.py b/src/plcdoc/documenters.py index ebb7824..1c45f29 100644 --- a/src/plcdoc/documenters.py +++ b/src/plcdoc/documenters.py @@ -2,7 +2,7 @@ import os.path from abc import ABC -from typing import Tuple, List, Dict, Optional, Any, Union +from typing import Tuple, List, Dict, Optional, Any import re from sphinx.util import logging diff --git a/src/plcdoc/interpreter.py b/src/plcdoc/interpreter.py index 560f519..b78734a 100644 --- a/src/plcdoc/interpreter.py +++ b/src/plcdoc/interpreter.py @@ -10,6 +10,9 @@ from textx import metamodel_from_file, TextXSyntaxError +USE_TEXTX = False +# USE_TEXTX = True + PACKAGE_DIR = os.path.dirname(__file__) logger = logging.getLogger(__name__) @@ -163,8 +166,7 @@ def _parse_declaration(self, item, filepath) -> Optional["TextXMetaClass"]: if declaration_node is None: return None - use_textx = True - if use_textx: + if USE_TEXTX: try: meta_model = self._meta_model.model_from_str(declaration_node.text) return textx_model_to_declaration(meta_model, filepath) @@ -264,14 +266,39 @@ def get_objects_in_folder(self, folder: str) -> List["PlcDeclaration"]: def ast_node_to_plc_declaration(node, file) -> "PlcDeclaration": objtype = None name = None + args = [] + members = [] + comment = "" if isinstance(node, ast.Function): name = node.name objtype = node.kind + comment = process_comment(node.comment) + for vl in node.variable_lists: + for v in vl.variables: + arg = PlcVariableDeclaration( + kind=vl.kind.lower(), + name=v.name, + ty=ast.type_to_text(v.ty), + comment=v.comment, + ) + args.append(arg) + elif isinstance(node, ast.TypeDef): name = node.name - objtype = str(node.ty) + comment = process_comment(node.comment) + if isinstance(node.ty, ast.Struct): + objtype = "struct" + for f in node.ty.fields: + members.append(lark_field_to_var(f)) + elif isinstance(node.ty, ast.Union): + objtype = "union" + elif isinstance(node.ty, ast.Enum): + objtype = "enum" + else: + raise ValueError(f"typedef not supported for type: {node.ty}") elif isinstance(node, ast.Property): + comment = process_comment(node.comment) objtype = "property" name = node.name elif isinstance(node, ast.VariableList): @@ -283,7 +310,18 @@ def ast_node_to_plc_declaration(node, file) -> "PlcDeclaration": raise ValueError(f"Unrecognized declaration in `{node}`") assert name is not None - return PlcDeclaration(objtype, name, file) + + return PlcDeclaration( + objtype, name=name, comment=comment, args=args, members=members, file=file + ) + + +def lark_field_to_var(field: ast.StructField) -> "PlcVariableDeclaration": + comment = field.comment + ty = ast.type_to_text(field.ty) + return PlcVariableDeclaration( + kind="member", name=field.name, ty=ty, comment=comment + ) def textx_model_to_declaration( @@ -381,7 +419,10 @@ def get_comment(_model) -> Optional[str]: return None big_block = big_block.strip() # Get rid of whitespace + return process_comment(big_block) + +def process_comment(big_block): # Remove comment indicators (cannot get rid of them by TextX) if big_block.startswith("(*"): big_block = big_block[2:] diff --git a/src/plcdoc/parsing/lexer.py b/src/plcdoc/parsing/lexer.py index 9c347a6..d73a20e 100644 --- a/src/plcdoc/parsing/lexer.py +++ b/src/plcdoc/parsing/lexer.py @@ -15,9 +15,12 @@ def __init__(self, lexer_conf): def lex(self, source): # print(code) - for token in token_filter(tokenize(source)): + # tokens = iter() + # tokens = map(token_filter2, tokens) + # tokens = map(, tokens) + for token in token_filter(token_filter2(tokenize(source))): type = token.kind - yield lark.lexer.Token(type, token) + yield lark.lexer.Token(type, token, line=token.row, column=token.column) @dataclass @@ -27,6 +30,7 @@ class Token: row: int column: int comment1: str + comment2: str def tokenize(source: str): @@ -35,17 +39,23 @@ def tokenize(source: str): ("COMMENT1", r"\(\*.*?\*\)"), ("COMMENT2", r"//.*?\n"), ("OP2", r"(:=)|(==)|(<=)|(!=)|(>=)|(\.\.)"), - ("OP", r"[<>=:;,\.\(\)\+\-\*\/]"), + ("OP", r"[<>=:;,\.\(\)\+\-\*\/\[\]]"), ("BIN_NUMBER", r"2#[0-1][0-1_]*"), - ("OCT_NUMBER", r"8#[0-7]+"), + ("OCT_NUMBER", r"8#[0-7][0-7_]*"), + ("DEC_NUMBER", r"10#[0-9][0-9_]*"), ("HEX_NUMBER", r"16#[0-9a-fA-F][0-9a-fA-F_]*"), - ("TIME", r"T#[0-9a-fA-F][0-9a-fA-F_]*"), - ("REAL", r"[0-9]+\.[0-9]+"), - ("NUMBER", r"[0-9]+"), - ("ID", r"[A-Za-z][A-Za-z_0-9]*"), + ("TIME", r"T#[0-9hHmMsS]+"), + ("ADDR", r"%[A-Za-z][A-Za-z0-9]*\*"), + ("REAL1", r"[0-9][0-9_]*[eE][-+]?[0-9]+"), # example: 1E2 + ("REAL2", r"[0-9][0-9_]*\.[0-9][0-9_]*"), # example: 1.0 + ("REAL3", r"[0-9][0-9_]*\.[0-9][0-9_]*[eE][-+]?[0-9]+"), # example: 1.0E2 + ("REAL4", r"\.[0-9][0-9_]*"), # example: .1 + ("REAL5", r"\.[0-9][0-9_]*[eE][-+]?[0-9]+"), # example: .1E3 + ("NUMBER", r"[0-9][0-9_]*"), + ("ID", r"[A-Za-z_][A-Za-z_0-9]*"), ("STRING", r"'[^']*'"), ("SPACE", r"[ \t]+"), - ("ATTRIBUTE", r"\{attribute.*?\}"), + ("ATTRIBUTE", r"\{.*?\}"), ("NEWLINE", r"\n"), ("OTHER", r"."), ] @@ -80,55 +90,100 @@ def tokenize(source: str): elif kind == "ID": if value in KEYWORDS: kind = "KW_" + value - elif kind == "NEWLINE": + elif value in VAR_KEYWORDS: + kind = "KW_VAR" + elif value in ACCESS_KEYWORDS: + kind = "KW_ACCESS" + elif value in INTEGER_DATA_TYPES: + kind = "INTTYPE" + elif kind == "NEWLINE" or kind == "COMMENT2": row += 1 + column = 1 + elif kind == "SPACE": + continue + elif kind.startswith("REAL"): + kind = "REAL" + elif kind.endswith("_NUMBER"): + kind = "NUMBER" elif kind == "OTHER": if value.isprintable(): c = value else: c = str(value.encode(encoding="utf-8", errors="replace")) - raise ValueError(f"Unexpected character: {c}") + raise ValueError(f"Unexpected character: {c} at ({row=},{column=})") - yield Token(kind, value, row, column, "") + yield Token(kind, value, row, column, "", "") - yield Token("EOF", "EOF", row, column, "") + yield Token("EOF", "EOF", row, column, "", "") KEYWORDS = { "ABSTRACT", "ARRAY", + "AT", + "CONSTANT", "END_STRUCT", "END_TYPE", + "END_UNION", "END_VAR", "EXTENDS", "FINAL", "FUNCTION", "FUNCTION_BLOCK", + "IMPLEMENTS", "INTERFACE", - "INTERNAL", "METHOD", "OF", + "PERSISTENT", "POINTER", "PROGRAM", "PROPERTY", - "PRIVATE", - "PROTECTED", - "PUBLIC", "REFERENCE", "STRING", "STRUCT", "TO", "TYPE", + "UNION", + "WSTRING", +} + +ACCESS_KEYWORDS = { + "PRIVATE", + "PROTECTED", + "PUBLIC", + "INTERNAL", +} + +VAR_KEYWORDS = { "VAR", "VAR_GLOBAL", + "VAR_IN_OUT", "VAR_INPUT", + "VAR_INST", "VAR_OUTPUT", + "VAR_STAT", + "VAR_TEMP", +} + +INTEGER_DATA_TYPES = { + "BYTE", + "WORD", + "DWORD", + "LWORD", + "SINT", + "USINT", + "INT", + "UINT", + "DINT", + "UDINT", + "LINT", + "ULINT", } def token_filter(tokens): + """Remove comment tokens, and add comment as attribute to the next token.""" comment1 = "" - attr = "" for token in tokens: if token.kind == "SPACE" or token.kind == "NEWLINE": continue @@ -138,8 +193,25 @@ def token_filter(tokens): elif token.kind == "COMMENT2": continue elif token.kind == "ATTRIBUTE": - attr = token.text + pass else: - token.comment1 = comment1 + if comment1: + token.comment1 = comment1 yield token comment1 = "" + + +def token_filter2(tokens): + previous_token = None + for token in tokens: + if token.kind == "COMMENT2": + if previous_token: + comment = token.text[2:].strip() + previous_token.comment1 = comment + + if previous_token: + yield previous_token + previous_token = token + + if previous_token: + yield previous_token diff --git a/src/plcdoc/parsing/nodes.py b/src/plcdoc/parsing/nodes.py index 04271ae..2483eb7 100644 --- a/src/plcdoc/parsing/nodes.py +++ b/src/plcdoc/parsing/nodes.py @@ -2,13 +2,13 @@ """ -from typing import Optional, Any, Union +from typing import Optional from dataclasses import dataclass @dataclass class Function: - comment1: str + comment: str kind: str name: str variable_lists: list["VariableList"] @@ -18,27 +18,34 @@ class Function: # class FunctionBlock: # name: str + @dataclass class Property: + comment: str name: str ty: "Type" # init: Optional["Expression"] + @dataclass class VariableList: kind: str + flags: list[str] variables: list["Variable"] @dataclass class Variable: name: str + address: Optional[str] ty: "Type" init: Optional["Expression"] + comment: str @dataclass class TypeDef: + comment: str name: str ty: "Type" @@ -63,6 +70,7 @@ class Union(Type): @dataclass class Enum(Type): options: list["EnumOption"] + base: Optional["Type"] @dataclass @@ -76,6 +84,7 @@ class LabeledArgument: label: str value: "Expression" + class Expression: pass @@ -87,25 +96,115 @@ class Binop(Expression): rhs: "Expression" +@dataclass +class Unop(Expression): + op: str + rhs: "Expression" + + +@dataclass +class Call(Expression): + callee: "Expression" + arguments: list["Expression"] + + @dataclass class Number(Expression): value: int @dataclass -class NameRef(Expression): - name: str +class FqNameRef(Expression): + names: str + + +def expression_to_text(expr, parens=False) -> str: + if isinstance(expr, Number): + return f"{expr.value}" + elif isinstance(expr, FqNameRef): + return ".".join(expr.names) + elif isinstance(expr, Unop): + rhs = expression_to_text(expr.rhs, parens=True) + if parens: + return f"({expr.op}{rhs})" + else: + return f"{expr.op}{rhs}" + elif isinstance(expr, Call): + callee = expression_to_text(expr.callee, parens=True) + args = ",".join(expression_to_text(a) for a in expr.arguments) + return f"{callee}({args})" + elif isinstance(expr, Binop): + lhs = expression_to_text(expr.lhs, parens=True) + rhs = expression_to_text(expr.rhs, parens=True) + if parens: + return f"({lhs} {expr.op} {rhs})" + else: + return f"{lhs} {expr.op} {rhs}" + else: + raise NotImplementedError(f"Not impl: {expr}") + + +def type_to_text(ty) -> str: + if isinstance(ty, StringType): + if ty.size: + size = expression_to_text(ty.size) + return f"STRING({size})" + else: + return "STRING" + elif isinstance(ty, IntegerType): + return ty.kind + elif isinstance(ty, FqNameRef): + return ".".join(ty.names) + elif isinstance(ty, ArrayType): + ",".join( + f"{expression_to_text(r.begin)}..{expression_to_text(r.end)}" if r else "*" + for r in ty.ranges + ) + d = 1 # TODO + e = type_to_text(ty.element_type) + return f"ARRAY [{d}] OF {e}" + elif isinstance(ty, PointerType): + e = type_to_text(ty.element_type) + return f"POINTER TO {e}" + elif isinstance(ty, ReferenceType): + e = type_to_text(ty.element_type) + return f"REFERENCE TO {e}" + else: + raise ValueError(f"Not impl: {type(ty)}") @dataclass class TypeRef: name: str + @dataclass -class Array: - ranges: list["Range"] +class StringType(Type): + size: Optional["Expression"] + + +@dataclass +class IntegerType(Type): + kind: str + domain: Optional["Range"] + + +@dataclass +class ArrayType(Type): + ranges: list[Optional["Range"]] + element_type: "Type" + + +@dataclass +class PointerType(Type): element_type: "Type" + +@dataclass +class ReferenceType(Type): + element_type: "Type" + + @dataclass class Range: begin: "Expression" diff --git a/src/plcdoc/parsing/parser.py b/src/plcdoc/parsing/parser.py index 5069366..3fe2888 100644 --- a/src/plcdoc/parsing/parser.py +++ b/src/plcdoc/parsing/parser.py @@ -1,11 +1,9 @@ -""" +""" Lark based PLC parser. """ - from .lexer import MyLexer from .transform import MyTransformer -from pprint import pprint import logging import lark @@ -13,116 +11,133 @@ def parse_new(text: str): - print(text) - # for token in tokenize(text): - # print(token) - + # print(text) tree = parser.parse(text) - # print("PARSED", tree) - print("PARSED:") - pprint(tree) + # print("PARSED:") + # pprint(tree, width=150) return tree grammar = """ -start: declaration EOF +start: declaration+ EOF declaration: function | property | type_def | variable_list -function: (KW_PROGRAM | KW_FUNCTION_BLOCK | KW_FUNCTION | KW_METHOD | KW_INTERFACE) visibility ID (COLON type)? extends? SEMI? variable_lists +function: function_kind visibility ID (COLON type)? exim SEMI? variable_lists +function_kind: KW_PROGRAM + | KW_FUNCTION_BLOCK + | KW_FUNCTION + | KW_METHOD + | KW_INTERFACE property: KW_PROPERTY visibility ID COLON type -extends: KW_EXTENDS ID -visibility: (KW_ABSTRACT | KW_PUBLIC | KW_PRIVATE | KW_PROTECTED | KW_INTERNAL | KW_FINAL)? +exim: extends implements? +extends: (KW_EXTENDS fq_name_ref)? +implements: KW_IMPLEMENTS fq_name_ref +visibility: (KW_ABSTRACT | KW_ACCESS | KW_FINAL)? variable_lists: variable_list* -variable_list: (KW_VAR_INPUT | KW_VAR_OUTPUT | KW_VAR_GLOBAL | KW_VAR) variable* KW_END_VAR -variable: ID COLON type initializer? SEMI - -type_def: KW_TYPE ID extends? COLON (struct | union | enum) KW_END_TYPE -struct: KW_STRUCT variable* KW_END_STRUCT -union: KW_UNION variable* KW_END_UNION -enum: PARENTHESIS_OPEN enum_values PARENTHESIS_CLOSE SEMI +variable_list: KW_VAR variable_list_flags variable* KW_END_VAR +variable_list_flags: (KW_CONSTANT | KW_PERSISTENT)* +variable: ids address COLON variable_type_init SEMI +variable_type_init: type initializer + | type PARENTHESIS_OPEN labeled_arguments PARENTHESIS_CLOSE + | type PARENTHESIS_OPEN expressions PARENTHESIS_CLOSE + | type PARENTHESIS_OPEN PARENTHESIS_CLOSE +address: (KW_AT ADDR)? + +type_def: KW_TYPE ID extends COLON (struct_decl | union_decl | enum_decl) KW_END_TYPE +struct_decl: KW_STRUCT variable* KW_END_STRUCT +union_decl: KW_UNION variable* KW_END_UNION +enum_decl: PARENTHESIS_OPEN enum_values PARENTHESIS_CLOSE integer_type? SEMI enum_values: enum_value | enum_values COMMA enum_value -enum_value: ID initializer? +enum_value: ID initializer -initializer: COLON_EQUALS expression +initializer: (COLON_EQUALS expression)? labeled_arguments: labeled_argument | labeled_arguments COMMA labeled_argument labeled_argument: ID COLON_EQUALS expression expressions: expression | expressions COMMA expression -expression: term + +expression: sum +sum: term | expression (PLUS | MINUS) term term: factor | term (ASTERIX | SLASH) factor factor: atom + | MINUS factor atom: literal - | name_ref + | fq_name_ref | struct_literal | range_literal | PARENTHESIS_OPEN expression PARENTHESIS_CLOSE -name_ref: ID + | atom PARENTHESIS_OPEN expressions PARENTHESIS_CLOSE + +ids: ID + | ids COMMA ID +fq_name_ref: ID + | fq_name_ref DOT ID + struct_literal: PARENTHESIS_OPEN labeled_arguments PARENTHESIS_CLOSE range_literal: PARENTHESIS_OPEN expression DOTDOT expression PARENTHESIS_CLOSE literal: NUMBER | REAL - | BIN_NUMBER - | OCT_NUMBER - | HEX_NUMBER + | TIME | STRING -type: name_ref +type: fq_name_ref + | integer_type | string_type | array_type | pointer_type | reference_type -string_type: KW_STRING - | KW_STRING PARENTHESIS_OPEN expression PARENTHESIS_CLOSE - | KW_STRING BRACKET_OPEN expression BRACKET_CLOSE +integer_type: INTTYPE range_literal? +string_type: (KW_STRING | KW_WSTRING) + | (KW_STRING | KW_WSTRING) PARENTHESIS_OPEN expression PARENTHESIS_CLOSE + | (KW_STRING | KW_WSTRING) BRACKET_OPEN expression BRACKET_CLOSE +pointer_type: KW_POINTER KW_TO type +reference_type: KW_REFERENCE KW_TO type array_type: KW_ARRAY BRACKET_OPEN subranges BRACKET_CLOSE KW_OF type -pointer_type: KW_POINTER KW_TO ID -reference_type: KW_REFERENCE KW_TO ID - subranges: subrange | subranges COMMA subrange subrange: ASTERIX | expression DOTDOT expression %declare KW_ABSTRACT -%declare KW_PROGRAM +%declare KW_ARRAY +%declare KW_ACCESS +%declare KW_AT +%declare KW_CONSTANT +%declare KW_END_STRUCT +%declare KW_END_TYPE +%declare KW_END_UNION +%declare KW_END_VAR +%declare KW_EXTENDS +%declare KW_FINAL %declare KW_FUNCTION %declare KW_FUNCTION_BLOCK +%declare KW_IMPLEMENTS %declare KW_INTERFACE %declare KW_METHOD +%declare KW_OF +%declare KW_PERSISTENT %declare KW_PROPERTY -%declare KW_EXTENDS -%declare KW_FINAL -%declare KW_PUBLIC -%declare KW_PRIVATE -%declare KW_PROTECTED -%declare KW_INTERNAL -%declare KW_TYPE -%declare KW_END_TYPE +%declare KW_PROGRAM %declare KW_POINTER %declare KW_STRUCT -%declare KW_END_STRUCT -%declare KW_UNION -%declare KW_END_UNION -%declare KW_STRING -%declare KW_ARRAY -%declare KW_OF %declare KW_REFERENCE +%declare KW_STRING %declare KW_TO -%declare KW_VAR_GLOBAL -%declare KW_VAR_INPUT -%declare KW_VAR_OUTPUT +%declare KW_TYPE +%declare KW_UNION %declare KW_VAR -%declare KW_END_VAR +%declare KW_WSTRING %declare ID -%declare NUMBER REAL BIN_NUMBER OCT_NUMBER HEX_NUMBER -%declare STRING +%declare NUMBER REAL +%declare TIME ADDR +%declare STRING INTTYPE %declare COLON_EQUALS %declare COLON SEMI COMMA DOT DOTDOT %declare PLUS MINUS ASTERIX SLASH diff --git a/src/plcdoc/parsing/transform.py b/src/plcdoc/parsing/transform.py index 78a487a..c91a87a 100644 --- a/src/plcdoc/parsing/transform.py +++ b/src/plcdoc/parsing/transform.py @@ -4,6 +4,8 @@ class MyTransformer(lark.Transformer): def start(self, rhs): + # TODO: we can have multiple declarations + # For example VAR_GLOBAL .. VAR_GLOBAL CONSTANT return rhs[0] def declaration(self, rhs): @@ -13,46 +15,49 @@ def visibility(self, rhs): return 1 def function(self, rhs): - comment1 = rhs[0].value.comment1 - print("FUNC", rhs) - kind = rhs[0].value.text.lower().replace("_", "") - index = 1 - if isinstance(rhs[index], int): - index += 1 - name = rhs[index].value.text + # print("FUNC", rhs) + comment, kind = rhs[0] + name = rhs[2].value.text variable_lists = rhs[-1] - return ast.Function(comment1, kind, name, variable_lists) + return ast.Function( + comment=comment, kind=kind, name=name, variable_lists=variable_lists + ) + + def function_kind(self, rhs): + comment = rhs[0].value.comment1 + kind = rhs[0].value.text.lower().replace("_", "") + return comment, kind def property(self, rhs): + comment = rhs[0].value.comment1 name = rhs[2].value.text ty = rhs[4] - return ast.Property(name, ty) + return ast.Property(comment, name, ty) def type_def(self, rhs): + comment = rhs[0].value.comment1 name = rhs[1].value.text ty = rhs[-2] - return ast.TypeDef(name, ty) + return ast.TypeDef(comment=comment, name=name, ty=ty) - def enum(self, rhs): + def enum_decl(self, rhs): options = rhs[1] - return ast.Enum(options) + base = rhs[-2] if len(rhs) == 5 else None + return ast.Enum(options, base) def enum_values(self, rhs): return comma(rhs) def enum_value(self, rhs): name = rhs[0].value.text - if len(rhs) > 1: - init = rhs[1] - else: - init = None + init = rhs[1] return ast.EnumOption(name, init) - def struct(self, rhs): + def struct_decl(self, rhs): fields = rhs[1:-1] return ast.Struct(fields) - def union(self, rhs): + def union_decl(self, rhs): fields = rhs[1:-1] return ast.Union(fields) @@ -61,20 +66,40 @@ def variable_lists(self, rhs): def variable_list(self, rhs): kind = rhs[0].value.text - variables = rhs[1:-1] - return ast.VariableList(kind, variables) + flags = rhs[1] + variables = rhs[2:-1] + return ast.VariableList(kind, flags, variables) + + def variable_list_flags(self, rhs): + return [r.value.text for r in rhs] def variable(self, rhs): - name = rhs[0].value.text - ty = rhs[2] - if len(rhs) > 4: - init = rhs[3] - else: + # print("VAR", rhs) + names = rhs[0] + name = names[0] + # TODO: support more than 1 name? + address = rhs[1] + ty, init = rhs[3] + comment = rhs[-1].value.comment1 + return ast.Variable(name, address, ty, init, comment) + + def variable_type_init(self, rhs): + ty = rhs[0] + if len(rhs) == 2: + init = rhs[1] + elif len(rhs) == 3: init = None - return ast.Variable(name, ty, init) + else: + init = rhs[2] + return (ty, init) + + def address(self, rhs): + if len(rhs) == 2: + return rhs[1].value.text def initializer(self, rhs): - return rhs[1] + if len(rhs) == 2: + return rhs[1] def labeled_arguments(self, rhs): return comma(rhs) @@ -88,45 +113,87 @@ def expressions(self, rhs): return comma(rhs) def expression(self, rhs): + return rhs[0] + + def sum(self, rhs): return binop(rhs) def term(self, rhs): return binop(rhs) - def atom(self, rhs): + def factor(self, rhs): if len(rhs) == 1: return rhs[0] else: - assert len(rhs) == 3 + op = rhs[0].value.text + return ast.Unop(op, rhs[1]) + + def atom(self, rhs): + if len(rhs) == 1: + return rhs[0] + elif len(rhs) == 3: return rhs[1] + else: + assert len(rhs) == 4 + callee = rhs[0] + args = rhs[2] + return ast.Call(callee, args) def literal(self, rhs): value = rhs[0].value.text return ast.Number(value) - + def struct_literal(self, rhs): return rhs[1] - + def range_literal(self, rhs): begin = rhs[1] end = rhs[3] return ast.Range(begin, end) - def name_ref(self, rhs): - name = rhs[0].value.text - return ast.NameRef(name) + def ids(self, rhs): + if len(rhs) == 1: + name = rhs[0].value.text + names = [name] + else: + name = rhs[2].value.text + names = rhs[0] + [name] + return names + + def fq_name_ref(self, rhs): + if len(rhs) == 1: + name = rhs[0].value.text + names = [name] + else: + name = rhs[2].value.text + names = rhs[0].names + [name] + return ast.FqNameRef(names) def type(self, rhs): + # TODO: handle range indicator for integer types. return rhs[0] + def integer_type(self, rhs): + ty = rhs[0].value.text + domain = rhs[1] if len(rhs) > 1 else None + return ast.IntegerType(ty, domain) + def string_type(self, rhs): - name = rhs[0].value.text - return ast.TypeRef(name) + size = rhs[2] if len(rhs) == 4 else None + return ast.StringType(size) + + def pointer_type(self, rhs): + ty = rhs[-1] + return ast.PointerType(ty) + + def reference_type(self, rhs): + ty = rhs[-1] + return ast.ReferenceType(ty) def array_type(self, rhs): ranges = rhs[2] element_type = rhs[5] - return ast.Array(ranges, element_type) + return ast.ArrayType(ranges, element_type) def subranges(self, rhs): return comma(rhs) From fc61bef10a338cae1a9031b9ac3d32f4875c30d5 Mon Sep 17 00:00:00 2001 From: Windel Bouwman Date: Sun, 5 May 2024 13:40:06 +0200 Subject: [PATCH 4/4] Add lark as dependency. --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 2eb304a..21d57e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ + "lark>=1", "sphinx>=5.0,<7.0", "textX>=3.0", ]