Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion nemoguardrails/actions/llm/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ async def init(self):
self._init_flows_index(),
)

def _extract_user_message_example(self, flow: Flow) -> None:
def _extract_user_message_example(self, flow: Flow):
"""Heuristic to extract user message examples from a flow."""
elements = [item for item in flow.elements if item["_type"] != "doc_string_stmt" and item["_type"] != "stmt"]
if len(elements) != 2:
Expand Down
13 changes: 8 additions & 5 deletions nemoguardrails/colang/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,31 @@ def __init__(self, config: RailsConfig, verbose: bool = False):
self.verbose = verbose

# Register the actions with the dispatcher.
imported_paths = config.imported_paths if config.imported_paths else {}
self.action_dispatcher = ActionDispatcher(
config_path=config.config_path,
import_paths=list(config.imported_paths.values()),
import_paths=list(imported_paths.values()),
)

if hasattr(self, "_run_output_rails_in_parallel_streaming"):
self.action_dispatcher.register_action(
self._run_output_rails_in_parallel_streaming,
getattr(self, "_run_output_rails_in_parallel_streaming"),
name="run_output_rails_in_parallel_streaming",
)

if hasattr(self, "_run_flows_in_parallel"):
self.action_dispatcher.register_action(self._run_flows_in_parallel, name="run_flows_in_parallel")
self.action_dispatcher.register_action(
getattr(self, "_run_flows_in_parallel"), name="run_flows_in_parallel"
)

if hasattr(self, "_run_input_rails_in_parallel"):
self.action_dispatcher.register_action(
self._run_input_rails_in_parallel, name="run_input_rails_in_parallel"
getattr(self, "_run_input_rails_in_parallel"), name="run_input_rails_in_parallel"
)

if hasattr(self, "_run_output_rails_in_parallel"):
self.action_dispatcher.register_action(
self._run_output_rails_in_parallel, name="run_output_rails_in_parallel"
getattr(self, "_run_output_rails_in_parallel"), name="run_output_rails_in_parallel"
)

# The list of additional parameters that can be passed to the actions.
Expand Down
43 changes: 28 additions & 15 deletions nemoguardrails/colang/v1_0/lang/colang_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ def _create_namespace(self, namespace):
# Now, append the new one
self.current_namespaces.append(namespace)
self.current_namespace = ".".join(self.current_namespaces)
assert self.next_line is not None, "next_line must not be None when creating namespace"
self.current_indentation = self.next_line["indentation"]
self.current_indentations.append(self.next_line["indentation"])

Expand All @@ -318,7 +319,7 @@ def _include_source_mappings(self):
# Include the source mapping information if required
if self.include_source_mapping:
if self.current_element and "_source_mapping" not in self.current_element:
self.current_element["_source_mapping"] = {
self.current_element["_source_mapping"] = { # type: ignore[assignment]
"filename": self.filename,
"line_number": self.current_line["number"],
"line_text": self.current_line["text"],
Expand Down Expand Up @@ -771,6 +772,10 @@ def _process_define(self):
# Finally, we parse the markdown content
self._extract_markdown()

def _insert_topic_flow_definition(self) -> None:
"""Insert a topic flow definition. Currently not implemented."""
raise NotImplementedError("Topic flow definitions are not yet implemented")

def _extract_indentation_levels(self):
"""Helper to extract the indentation levels higher than the current line."""
indentations = []
Expand Down Expand Up @@ -910,13 +915,14 @@ def _extract_params(self, param_lines: Optional[List] = None):
yaml_value = {"$0": yaml_value}

# self.current_element.update(yaml_value)
for k in yaml_value.keys():
# if the key tarts with $, we remove it
param_name = k
if param_name[0] == "$":
param_name = param_name[1:]
if self.current_element is not None and isinstance(self.current_element, dict):
for k in yaml_value.keys():
# if the key tarts with $, we remove it
param_name = k
if param_name[0] == "$":
param_name = param_name[1:]

self.current_element[param_name] = yaml_value[k]
self.current_element[param_name] = yaml_value[k] # type: ignore[assignment]

def _is_test_flow(self):
"""Returns true if the current flow is a test one.
Expand Down Expand Up @@ -956,6 +962,7 @@ def _is_sample_flow(self):

def _parse_when(self):
# TODO: deal with "when" after "else when"
assert self.next_line is not None, "Expected next line after 'when' statement."
assert self.next_line["indentation"] > self.current_line["indentation"], (
"Expected indented block after 'when' statement."
)
Expand Down Expand Up @@ -1280,6 +1287,7 @@ def _parse_bot(self):

# Finally, decide what to include in the element
if utterance_id is None:
assert utterance_text is not None, "utterance_text must not be None when utterance_id is None"
self.current_element["bot"] = {
"_type": "element",
"text": utterance_text[1:-1],
Expand All @@ -1298,11 +1306,12 @@ def _parse_bot(self):
# If there was a bot message with a snippet, we also add an expect
# TODO: can this be handled better?
try:
if "snippet" in self.current_element["bot"]:
bot_element = self.current_element["bot"]
if isinstance(bot_element, dict) and "snippet" in bot_element:
self.branches[-1]["elements"].append(
{
"expect": "snippet",
"snippet": self.current_element["bot"]["snippet"],
"snippet": bot_element["snippet"],
}
)
# noinspection PyBroadException
Expand Down Expand Up @@ -1374,7 +1383,7 @@ def _parse_do(self):
if return_vars:
return_vars = get_stripped_tokens(return_vars.split(","))
return_vars = [_var[1:] if _var[0] == "$" else _var for _var in return_vars]
self.current_element["_return_vars"] = return_vars
self.current_element["_return_vars"] = return_vars # type: ignore[assignment]

# Add to current branch
self.branches[-1]["elements"].append(self.current_element)
Expand Down Expand Up @@ -1477,6 +1486,7 @@ def _parse_if_branch(self, if_condition):
self.current_element = {"if": if_condition, "then": []}
self.branches[-1]["elements"].append(self.current_element)

assert self.next_line is not None, "next_line must not be None when parsing if branch"
self.ifs.append(
{
"element": self.current_element,
Expand Down Expand Up @@ -1519,6 +1529,7 @@ def _parse_while(self):
self.current_element = {"while": while_condition, "do": []}
self.branches[-1]["elements"].append(self.current_element)

assert self.next_line is not None, "next_line must not be None when parsing while"
# Add a new branch for the then part
self.branches.append(
{
Expand All @@ -1533,6 +1544,7 @@ def _parse_any(self):
}
self.branches[-1]["elements"].append(self.current_element)

assert self.next_line is not None, "next_line must not be None when parsing any"
# Add a new branch for the then part
self.branches.append(
{
Expand Down Expand Up @@ -1562,6 +1574,7 @@ def _parse_infer(self):
}
self.branches[-1]["elements"].append(self.current_element)

assert self.next_line is not None, "next_line must not be None when parsing infer"
# Add a new branch for the then part
self.branches.append(
{
Expand Down Expand Up @@ -1600,7 +1613,7 @@ def _parse_return(self):
}

if return_values:
self.current_element["_return_values"] = return_values
self.current_element["_return_values"] = return_values # type: ignore[assignment]

self.branches[-1]["elements"].append(self.current_element)

Expand Down Expand Up @@ -1697,15 +1710,15 @@ def parse(self):
exception = Exception(error)

# Decorate the exception with where the parsing failed
exception.filename = self.filename
exception.line = self.current_line["number"]
exception.error = str(ex)
exception.filename = self.filename # type: ignore[attr-defined]
exception.line = self.current_line["number"] # type: ignore[attr-defined]
exception.error = str(ex) # type: ignore[attr-defined]

raise exception

self.current_line_idx += 1

result = {"flows": self.flows}
result: dict = {"flows": self.flows}

if self.imports:
result["imports"] = self.imports
Expand Down
3 changes: 3 additions & 0 deletions nemoguardrails/colang/v1_0/lang/comd_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,9 @@ def parse_md_file(file_name, content=None):
continue

# Make sure we have the type of the symbol in the name of the symbol
if not sym or not isinstance(sym, str):
raise ValueError(f"sym must be a non-empty string, found {sym}")

sym = _get_typed_symbol_name(sym, symbol_type)

# For objects, we translate the "string" type to "kb:Object:prop|partial"
Expand Down
23 changes: 13 additions & 10 deletions nemoguardrails/colang/v1_0/lang/coyml_parser.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems that all control flow jump offsets are being converted from int to str:

# before:
if_element["_next_else"] = len(then_elements) + 1
{"_type": "jump", "_next": len(else_elements) + 1}
while_element["_next_on_break"] = n + 2
do_elements[j]["_next_on_break"] = n + 1 - j
do_elements[j]["_next_on_continue"] = -1 * j - 1

# after:
if_element["_next_else"] = str(len(then_elements) + 1)
{"_type": "jump", "_next": str(len(else_elements) + 1)}
while_element["_next_on_break"] = str(n + 2)
do_elements[j]["_next_on_break"] = str(n + 1 - j)
do_elements[j]["_next_on_continue"] = str(-1 * j - 1)
  • the runtime (sliding.py) calls int() on all these values
  • this suggests the code was defensive and could handle both int and str, however this is a significant data type change for control flow values
  • the change works because runtime converts back to int, but adds unnecessary string conversions

this seems like working around type annotations rather than fixing them, might be better to update type annotations to accept Union[int, str] or just int and if type annotations somewhere require str`, those annotations should be reviewed this adds runtime overhead (str conversion + int parsing) negligible but for no functional benefit

Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ def _dict_to_element(d):
elif d_type in ["break"]:
element = {"_type": "break"}
elif d_type in ["return"]:
element = {"_type": "jump", "_next": "-1", "_absolute": True}
element = {"_type": "jump", "_next": -1, "_absolute": True}

# Include the return values information
if "_return_values" in d:
Expand Down Expand Up @@ -420,14 +420,16 @@ def _extract_elements(items: List) -> List[dict]:
# for `if` flow elements, we have to go recursively
if element["_type"] == "if":
if_element = element
then_elements = _extract_elements(if_element["then"])
else_elements = _extract_elements(if_element["else"])
then_items = if_element["then"] if isinstance(if_element["then"], list) else []
else_items = if_element["else"] if isinstance(if_element["else"], list) else []
then_elements = _extract_elements(then_items)
else_elements = _extract_elements(else_items)

# Remove the raw info
del if_element["then"]
del if_element["else"]

if_element["_next_else"] = len(then_elements) + 1
if_element["_next_else"] = len(then_elements) + 1 # type: ignore[arg-type]

# Add the "if"
elements.append(if_element)
Expand All @@ -437,23 +439,24 @@ def _extract_elements(items: List) -> List[dict]:

# if we have "else" elements, we need to adjust also add a jump
if len(else_elements) > 0:
elements.append({"_type": "jump", "_next": len(else_elements) + 1})
if_element["_next_else"] += 1
elements.append({"_type": "jump", "_next": len(else_elements) + 1}) # type: ignore[dict-item]
if_element["_next_else"] += 1 # type: ignore[arg-type, operator]

# Add the "else" elements
elements.extend(else_elements)

# WHILE
elif element["_type"] == "while":
while_element = element
do_elements = _extract_elements(while_element["do"])
do_items = while_element["do"] if isinstance(while_element["do"], list) else []
do_elements = _extract_elements(do_items)
n = len(do_elements)

# Remove the raw info
del while_element["do"]

# On break we have to skip n elements and 1 jump, hence we go to n+2
while_element["_next_on_break"] = n + 2
while_element["_next_on_break"] = n + 2 # type: ignore[arg-type]

# We need to compute the jumps on break and on continue for each element
for j in range(n):
Expand Down Expand Up @@ -500,7 +503,7 @@ def _extract_elements(items: List) -> List[dict]:
branch_element = {
"_type": "branch",
# these are the relative positions to the current position
"branch_heads": [],
"branch_heads": [], # type: ignore
}
branch_element_pos = len(elements)
elements.append(branch_element)
Expand All @@ -520,7 +523,7 @@ def _extract_elements(items: List) -> List[dict]:
branch_element["_source_mapping"] = branch_path[0]["_source_mapping"]

# Create the jump element
jump_element = {"_type": "jump", "_next": 1}
jump_element = {"_type": "jump", "_next": 1} # type: ignore

# We compute how far we need to jump based on the remaining branches
j = branch_idx + 1
Expand Down
9 changes: 7 additions & 2 deletions nemoguardrails/colang/v1_0/lang/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,14 @@ def get_numbered_lines(content: str):
current_comment = None
multiline_string = False
current_string = None
multiline_indentation = 0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: multiline_indentation is initialized but never written to after this point. The value set on line 120 is never used because it's only set inside the multiline_string block that never reads this variable.

while i < len(raw_lines):
raw_line = raw_lines[i].strip()

# handle multiline string
if multiline_string:
if current_string is None:
current_string = ""
Comment on lines +95 to +96
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: This check is redundant. current_string is always None when entering the multiline_string block for the first time (line 119 sets it to raw_line), and it's set to None on line 110 when exiting. Is there a scenario where current_string could be set to None while multiline_string is True?

current_string += "\n" + raw_line
if raw_line.endswith('"'):
multiline_string = False
Expand Down Expand Up @@ -139,6 +142,8 @@ def get_numbered_lines(content: str):
continue

if multiline_comment:
if current_comment is None:
current_comment = ""
Comment on lines +145 to +146
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

style: This check is redundant. current_comment is set to raw_line[3:] on line 144 when entering the multiline_comment block, so it cannot be None here.

if raw_line.endswith('"""'):
current_comment += "\n" + raw_line[0:-3]
multiline_comment = False
Expand Down Expand Up @@ -403,7 +408,7 @@ def parse_package_name(text):
return package_name


def string_hash(s):
def string_hash(s: str) -> str:
"""A simple string hash with an equivalent implementation in javascript.

module.exports.string_hash = function(s){
Expand All @@ -421,7 +426,7 @@ def string_hash(s):
"""
_hash = 0
if len(s) == 0:
return 0
return "0"
for i in range(len(s)):
_char = ord(s[i])
_hash = ((_hash << 5) - _hash) + _char
Expand Down
Loading