From 3e67b1778a612f433f65abd4fc7adc215b603d14 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Sun, 14 Sep 2025 21:04:58 -0500
Subject: [PATCH 01/16] Cleaned llm/ type errors

---
 nemoguardrails/llm/filters.py                 |  8 +--
 nemoguardrails/llm/helpers.py                 | 23 +++----
 nemoguardrails/llm/models/initializer.py      |  7 +-
 nemoguardrails/llm/params.py                  | 15 +++--
 .../llm/providers/huggingface/pipeline.py     | 39 ++++++++---
 .../llm/providers/huggingface/streamers.py    | 26 ++++++--
 nemoguardrails/llm/providers/trtllm/client.py | 66 ++++++++++++-------
 nemoguardrails/llm/providers/trtllm/llm.py    | 10 ++-
 nemoguardrails/llm/taskmanager.py             | 21 ++++--
 9 files changed, 149 insertions(+), 66 deletions(-)

diff --git a/nemoguardrails/llm/filters.py b/nemoguardrails/llm/filters.py
index c195d5b01..a78110919 100644
--- a/nemoguardrails/llm/filters.py
+++ b/nemoguardrails/llm/filters.py
@@ -140,7 +140,7 @@ def to_messages(colang_history: str) -> List[dict]:
     # a message from the user, and the rest gets translated to messages from the assistant.
     lines = colang_history.split("\n")
 
-    bot_lines = []
+    bot_lines: list[str] = []
     for i, line in enumerate(lines):
         if line.startswith('user "'):
             # If we have bot lines in the buffer, we first add a bot message.
@@ -181,8 +181,8 @@ def to_messages_v2(colang_history: str) -> List[dict]:
     # a message from the user, and the rest gets translated to messages from the assistant.
     lines = colang_history.split("\n")
 
-    user_lines = []
-    bot_lines = []
+    user_lines: list[str] = []
+    bot_lines: list[str] = []
     for line in lines:
         if line.startswith("user action:"):
             if len(bot_lines) > 0:
@@ -275,7 +275,7 @@ def verbose_v1(colang_history: str) -> str:
     return "\n".join(lines)
 
 
-def to_chat_messages(events: List[dict]) -> str:
+def to_chat_messages(events: List[dict]) -> List[dict]:
     """Filter that turns an array of events into a sequence of user/assistant messages.
 
     Properly handles multimodal content by preserving the structure when the content
diff --git a/nemoguardrails/llm/helpers.py b/nemoguardrails/llm/helpers.py
index 04835d669..23d001e90 100644
--- a/nemoguardrails/llm/helpers.py
+++ b/nemoguardrails/llm/helpers.py
@@ -13,18 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import List, Optional, Type, Union
+from typing import List, Optional, Type
 
 from langchain.callbacks.manager import (
     AsyncCallbackManagerForLLMRun,
     CallbackManagerForLLMRun,
 )
-from langchain_core.language_models.llms import LLM, BaseLLM
+from langchain_core.language_models.llms import LLM
 
 
-def get_llm_instance_wrapper(
-    llm_instance: Union[LLM, BaseLLM], llm_type: str
-) -> Type[LLM]:
+def get_llm_instance_wrapper(llm_instance: LLM, llm_type: str) -> Type[LLM]:
     """Wraps an LLM instance in a class that can be registered with LLMRails.
 
     This is useful to create specific types of LLMs using a generic LLM provider
@@ -47,7 +45,7 @@ def model_kwargs(self):
             These are needed to allow changes to the arguments of the LLM calls.
             """
             if hasattr(llm_instance, "model_kwargs"):
-                return llm_instance.model_kwargs
+                return getattr(llm_instance, "model_kwargs")
             return {}
 
         @property
@@ -66,26 +64,29 @@ def _modify_instance_kwargs(self):
             """
 
             if hasattr(llm_instance, "model_kwargs"):
-                if isinstance(llm_instance.model_kwargs, dict):
-                    llm_instance.model_kwargs["temperature"] = self.temperature
-                    llm_instance.model_kwargs["streaming"] = self.streaming
+                model_kwargs = getattr(llm_instance, "model_kwargs")
+                if isinstance(model_kwargs, dict):
+                    model_kwargs["temperature"] = self.temperature
+                    model_kwargs["streaming"] = self.streaming
 
         def _call(
             self,
             prompt: str,
             stop: Optional[List[str]] = None,
             run_manager: Optional[CallbackManagerForLLMRun] = None,
+            **kwargs,
         ) -> str:
             self._modify_instance_kwargs()
-            return llm_instance._call(prompt, stop, run_manager)
+            return llm_instance._call(prompt, stop, run_manager, **kwargs)
 
         async def _acall(
             self,
             prompt: str,
             stop: Optional[List[str]] = None,
             run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+            **kwargs,
         ) -> str:
             self._modify_instance_kwargs()
-            return await llm_instance._acall(prompt, stop, run_manager)
+            return await llm_instance._acall(prompt, stop, run_manager, **kwargs)
 
     return WrapperLLM
diff --git a/nemoguardrails/llm/models/initializer.py b/nemoguardrails/llm/models/initializer.py
index 09071920c..bd97d03fd 100644
--- a/nemoguardrails/llm/models/initializer.py
+++ b/nemoguardrails/llm/models/initializer.py
@@ -20,12 +20,15 @@
 from langchain_core.language_models import BaseChatModel
 from langchain_core.language_models.llms import BaseLLM
 
-from .langchain_initializer import ModelInitializationError, init_langchain_model
+from nemoguardrails.llm.models.langchain_initializer import (
+    ModelInitializationError,
+    init_langchain_model,
+)
 
 
 # later we can easily conver it to a class
 def init_llm_model(
-    model_name: Optional[str],
+    model_name: str,
     provider_name: str,
     mode: Literal["chat", "text"],
     kwargs: Dict[str, Any],
diff --git a/nemoguardrails/llm/params.py b/nemoguardrails/llm/params.py
index 7a4cf13f6..3cdf948c0 100644
--- a/nemoguardrails/llm/params.py
+++ b/nemoguardrails/llm/params.py
@@ -36,7 +36,7 @@
 
 import logging
 import warnings
-from typing import Dict, Type
+from typing import Any, Dict, Type
 
 from langchain.base_language import BaseLanguageModel
 
@@ -61,18 +61,18 @@ def __init__(self, llm: BaseLanguageModel, **kwargs):
         warnings.warn(_DEPRECATION_MESSAGE, DeprecationWarning, stacklevel=2)
         self.llm = llm
         self.altered_params = kwargs
-        self.original_params = {}
+        self.original_params: dict[str, Any] = {}
 
     def __enter__(self):
         # Here we can access and modify the global language model parameters.
-        self.original_params = {}
         for param, value in self.altered_params.items():
             if hasattr(self.llm, param):
                 self.original_params[param] = getattr(self.llm, param)
                 setattr(self.llm, param, value)
 
             elif hasattr(self.llm, "model_kwargs"):
-                if param not in self.llm.model_kwargs:
+                model_kwargs = getattr(self.llm, "model_kwargs", {})
+                if param not in model_kwargs:
                     log.warning(
                         "Parameter %s does not exist for %s. Passing to model_kwargs",
                         param,
@@ -81,9 +81,10 @@ def __enter__(self):
 
                     self.original_params[param] = None
                 else:
-                    self.original_params[param] = self.llm.model_kwargs[param]
+                    self.original_params[param] = model_kwargs[param]
 
-                self.llm.model_kwargs[param] = value
+                model_kwargs[param] = value
+                setattr(self.llm, "model_kwargs", model_kwargs)
 
             else:
                 log.warning(
@@ -92,7 +93,7 @@ def __enter__(self):
                     self.llm.__class__.__name__,
                 )
 
-    def __exit__(self, type, value, traceback):
+    def __exit__(self, exc_type, value, traceback):
         # Restore original parameters when exiting the context
         for param, value in self.original_params.items():
             if hasattr(self.llm, param):
diff --git a/nemoguardrails/llm/providers/huggingface/pipeline.py b/nemoguardrails/llm/providers/huggingface/pipeline.py
index 8745a109d..918837693 100644
--- a/nemoguardrails/llm/providers/huggingface/pipeline.py
+++ b/nemoguardrails/llm/providers/huggingface/pipeline.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import asyncio
 from typing import Any, List, Optional
 
 from langchain.callbacks.manager import (
@@ -20,7 +21,25 @@
     CallbackManagerForLLMRun,
 )
 from langchain.schema.output import GenerationChunk
-from langchain_community.llms import HuggingFacePipeline
+
+# Import HuggingFacePipeline with fallbacks for different LangChain versions
+HuggingFacePipeline = None  # type: ignore[assignment]
+
+try:
+    from langchain_community.llms import (
+        HuggingFacePipeline,  # type: ignore[attr-defined,no-redef]
+    )
+except ImportError:
+    # Fallback for older versions of langchain
+    try:
+        from langchain.llms import (
+            HuggingFacePipeline,  # type: ignore[attr-defined,no-redef]
+        )
+    except ImportError:
+        # Create a dummy class if HuggingFacePipeline is not available
+        class HuggingFacePipeline:  # type: ignore[misc,no-redef]
+            def __init__(self, *args, **kwargs):
+                raise ImportError("HuggingFacePipeline is not available")
 
 
 class HuggingFacePipelineCompatible(HuggingFacePipeline):
@@ -47,12 +66,13 @@ def _call(
             )
 
         # Streaming for NeMo Guardrails is not supported in sync calls.
-        if self.model_kwargs and self.model_kwargs.get("streaming"):
-            raise Exception(
+        model_kwargs = getattr(self, "model_kwargs", {})
+        if model_kwargs and model_kwargs.get("streaming"):
+            raise NotImplementedError(
                 "Streaming mode not supported for HuggingFacePipeline in NeMo Guardrails!"
             )
 
-        llm_result = self._generate(
+        llm_result = getattr(self, "_generate")(
             [prompt],
             stop=stop,
             run_manager=run_manager,
@@ -78,11 +98,12 @@ async def _acall(
             )
 
         # Handle streaming, if the flag is set
-        if self.model_kwargs and self.model_kwargs.get("streaming"):
+        model_kwargs = getattr(self, "model_kwargs", {})
+        if model_kwargs and model_kwargs.get("streaming"):
             # Retrieve the streamer object, needs to be set in model_kwargs
-            streamer = self.model_kwargs.get("streamer")
+            streamer = model_kwargs.get("streamer")
             if not streamer:
-                raise Exception(
+                raise ValueError(
                     "Cannot stream, please add HuggingFace streamer object to model_kwargs!"
                 )
 
@@ -99,7 +120,7 @@ async def _acall(
                 run_manager=run_manager,
                 **kwargs,
             )
-            loop.create_task(self._agenerate(**generation_kwargs))
+            loop.create_task(getattr(self, "_agenerate")(**generation_kwargs))
 
             # And start waiting for the chunks to come in.
             completion = ""
@@ -111,7 +132,7 @@ async def _acall(
 
             return completion
 
-        llm_result = await self._agenerate(
+        llm_result = await getattr(self, "_agenerate")(
             [prompt],
             stop=stop,
             run_manager=run_manager,
diff --git a/nemoguardrails/llm/providers/huggingface/streamers.py b/nemoguardrails/llm/providers/huggingface/streamers.py
index c163b8217..7ed5a3beb 100644
--- a/nemoguardrails/llm/providers/huggingface/streamers.py
+++ b/nemoguardrails/llm/providers/huggingface/streamers.py
@@ -14,11 +14,27 @@
 # limitations under the License.
 
 import asyncio
+from typing import TYPE_CHECKING, Optional
 
-from transformers.generation.streamers import TextStreamer
+TRANSFORMERS_AVAILABLE = True
+try:
+    from transformers.generation.streamers import (  # type: ignore[import-untyped]
+        TextStreamer,
+    )
+except ImportError:
+    # Fallback if transformers is not available
+    TRANSFORMERS_AVAILABLE = False
 
+    class TextStreamer:  # type: ignore[no-redef]
+        def __init__(self, *args, **kwargs):
+            pass
 
-class AsyncTextIteratorStreamer(TextStreamer):
+
+if TYPE_CHECKING:
+    from transformers import AutoTokenizer  # type: ignore[import-untyped]
+
+
+class AsyncTextIteratorStreamer(TextStreamer):  # type: ignore[misc]
     """
     Simple async implementation for HuggingFace Transformers streamers.
 
@@ -30,12 +46,14 @@ def __init__(
         self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, **decode_kwargs
     ):
         super().__init__(tokenizer, skip_prompt, **decode_kwargs)
-        self.text_queue = asyncio.Queue()
+        self.text_queue: asyncio.Queue[str] = asyncio.Queue()
         self.stop_signal = None
-        self.loop = None
+        self.loop: Optional[asyncio.AbstractEventLoop] = None
 
     def on_finalized_text(self, text: str, stream_end: bool = False):
         """Put the new text in the queue. If the stream is ending, also put a stop signal in the queue."""
+        if self.loop is None:
+            return
         if len(text) > 0:
             asyncio.run_coroutine_threadsafe(self.text_queue.put(text), self.loop)
 
diff --git a/nemoguardrails/llm/providers/trtllm/client.py b/nemoguardrails/llm/providers/trtllm/client.py
index 46fd2ff3f..9e74d72c1 100644
--- a/nemoguardrails/llm/providers/trtllm/client.py
+++ b/nemoguardrails/llm/providers/trtllm/client.py
@@ -19,7 +19,25 @@
 import queue
 import time
 from functools import partial
-from typing import Any, Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
+
+# Try to import tritonclient dependencies, with fallbacks for type checking
+try:
+    import tritonclient.grpc as grpcclient
+    from tritonclient.grpc.service_pb2 import (
+        ModelInferResponse,  # type: ignore[attr-defined]
+    )
+
+    TRITONCLIENT_AVAILABLE = True
+except ImportError:
+    # Create dummy types when tritonclient is not available
+    grpcclient = Any  # type: ignore
+    ModelInferResponse = Any  # type: ignore
+    TRITONCLIENT_AVAILABLE = False
+
+if TYPE_CHECKING and not TRITONCLIENT_AVAILABLE:
+    import tritonclient.grpc as grpcclient  # type: ignore
+    from tritonclient.grpc.service_pb2 import ModelInferResponse  # type: ignore
 
 STOP_WORDS = ["</s>"]
 BAD_WORDS = [""]
@@ -31,11 +49,11 @@ class TritonClient:
 
     def __init__(self, server_url: str) -> None:
         """Initialize the client."""
-        # pylint: disable-next=import-outside-toplevel
-        import tritonclient.grpc as grpcclient
+        if not TRITONCLIENT_AVAILABLE:
+            raise ImportError("tritonclient is required for TensorRT-LLM support")
 
         self.server_url = server_url
-        self.client = grpcclient.InferenceServerClient(server_url)
+        self.client = grpcclient.InferenceServerClient(server_url)  # type: ignore
 
     def load_model(self, model_name: str, timeout: int = 1000) -> None:
         """Load a model into the server."""
@@ -54,29 +72,33 @@ def load_model(self, model_name: str, timeout: int = 1000) -> None:
     def get_model_list(self) -> List[str]:
         """Get a list of models loaded in the triton server."""
         res = self.client.get_model_repository_index(as_json=True)
+        if res is None or "models" not in res:
+            return []
         return [model["name"] for model in res["models"]]
 
     def get_model_concurrency(self, model_name: str, timeout: int = 1000) -> int:
         """Get the modle concurrency."""
         self.load_model(model_name, timeout)
-        instances = self.client.get_model_config(model_name, as_json=True)["config"][
-            "instance_group"
-        ]
+        config_result = self.client.get_model_config(model_name, as_json=True)
+        if config_result is None or "config" not in config_result:
+            return 0
+        instances = config_result["config"].get("instance_group", [])
         return sum(instance["count"] * len(instance["gpus"]) for instance in instances)
 
     @staticmethod
     def process_result(result: Dict[str, str]) -> Dict[str, str]:
         """Post-process the result from the server."""
-        import google.protobuf.json_format  # pylint: disable=import-outside-toplevel
-        import tritonclient.grpc as grpcclient  # pylint: disable=import-outside-toplevel
+        if not TRITONCLIENT_AVAILABLE:
+            raise ImportError("tritonclient is required for TensorRT-LLM support")
 
-        # pylint: disable-next=import-outside-toplevel
-        from tritonclient.grpc.service_pb2 import ModelInferResponse
+        import google.protobuf.json_format
 
-        message = ModelInferResponse()
+        message = ModelInferResponse()  # type: ignore[misc]
         google.protobuf.json_format.Parse(json.dumps(result), message)
-        infer_result = grpcclient.InferResult(message)
+        infer_result = grpcclient.InferResult(message)  # type: ignore
         np_res = infer_result.as_numpy("OUTPUT_0")
+        if np_res is None:
+            return {"OUTPUT_0": ""}
         if np_res.ndim == 2:
             generated_text = np_res[0, 0].decode()
         else:
@@ -140,21 +162,21 @@ def close_streaming(self) -> None:
         self.client.stop_stream()
 
     @staticmethod
-    def generate_outputs() -> List["grpcclient.InferRequestedOutput"]:
+    def generate_outputs() -> List[Any]:
         """Generate the expected output structure."""
-        import tritonclient.grpc as grpcclient  # pylint: disable=import-outside-toplevel
-
-        return [grpcclient.InferRequestedOutput("OUTPUT_0")]
+        if not TRITONCLIENT_AVAILABLE:
+            raise ImportError("tritonclient is required for TensorRT-LLM support")
+        return [grpcclient.InferRequestedOutput("OUTPUT_0")]  # type: ignore
 
     @staticmethod
-    def prepare_tensor(name: str, input_data: Any) -> "grpcclient.InferInput":
+    def prepare_tensor(name: str, input_data: Any) -> Any:
         """Prepare an input data structure."""
-        import tritonclient.grpc as grpcclient  # pylint: disable=import-outside-toplevel
+        if not TRITONCLIENT_AVAILABLE:
+            raise ImportError("tritonclient is required for TensorRT-LLM support")
 
-        # pylint: disable-next=import-outside-toplevel
         from tritonclient.utils import np_to_triton_dtype
 
-        t = grpcclient.InferInput(
+        t = grpcclient.InferInput(  # type: ignore
             name, input_data.shape, np_to_triton_dtype(input_data.dtype)
         )
         t.set_data_from_numpy(input_data)
@@ -170,7 +192,7 @@ def generate_inputs(  # pylint: disable=too-many-arguments,too-many-locals
         beam_width: int = 1,
         repetition_penalty: float = 1,
         length_penalty: float = 1.0,
-    ) -> List["grpcclient.InferInput"]:
+    ) -> List[Any]:
         """Create the input for the triton inference server."""
         import numpy as np  # pylint: disable=import-outside-toplevel
 
diff --git a/nemoguardrails/llm/providers/trtllm/llm.py b/nemoguardrails/llm/providers/trtllm/llm.py
index cec6a5fe1..ea332d7cc 100644
--- a/nemoguardrails/llm/providers/trtllm/llm.py
+++ b/nemoguardrails/llm/providers/trtllm/llm.py
@@ -18,7 +18,13 @@
 
 import queue
 from functools import partial
-from typing import Any, Dict, List, Optional
+from typing import TYPE_CHECKING, Any, Dict, List, Optional
+
+if TYPE_CHECKING:
+    try:
+        from tritonclient.utils import InferenceServerException
+    except ImportError:
+        InferenceServerException = Exception
 
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain_core.language_models.llms import BaseLLM
@@ -107,7 +113,7 @@ def _llm_type(self) -> str:
     def _call(
         self,
         prompt: str,
-        stop: Optional[List[str]] = None,
+        stop: Optional[List[str]] = None,  # pylint: disable=unused-argument
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> str:
diff --git a/nemoguardrails/llm/taskmanager.py b/nemoguardrails/llm/taskmanager.py
index 49e39cc24..1cf5850bb 100644
--- a/nemoguardrails/llm/taskmanager.py
+++ b/nemoguardrails/llm/taskmanager.py
@@ -95,6 +95,8 @@ def __init__(self, config: RailsConfig):
     def _get_general_instructions(self):
         """Helper to extract the general instructions."""
         text = ""
+        if self.config.instructions is None:
+            return text
         for instruction in self.config.instructions:
             if instruction.type == "general":
                 text = instruction.content
@@ -266,7 +268,9 @@ def render_task_prompt(
             task_prompt = self._render_string(
                 prompt.content, context=context, events=events
             )
-            while len(task_prompt) > prompt.max_length:
+            while (
+                prompt.max_length is not None and len(task_prompt) > prompt.max_length
+            ):
                 if not events:
                     raise Exception(
                         f"Prompt exceeds max length of {prompt.max_length} characters even without history"
@@ -288,20 +292,27 @@ def render_task_prompt(
 
             return task_prompt
         else:
+            if prompt.messages is None:
+                return []
             task_messages = self._render_messages(
                 prompt.messages, context=context, events=events
             )
             task_prompt_length = self._get_messages_text_length(task_messages)
-            while task_prompt_length > prompt.max_length:
+            while (
+                prompt.max_length is not None and task_prompt_length > prompt.max_length
+            ):
                 if not events:
                     raise Exception(
                         f"Prompt exceeds max length of {prompt.max_length} characters even without history"
                     )
                 # Remove events from the beginning of the history until the prompt fits.
                 events = events[1:]
-                task_messages = self._render_messages(
-                    prompt.messages, context=context, events=events
-                )
+                if prompt.messages is not None:
+                    task_messages = self._render_messages(
+                        prompt.messages, context=context, events=events
+                    )
+                else:
+                    task_messages = []
                 task_prompt_length = self._get_messages_text_length(task_messages)
             return task_messages
 

From d91749c2e1e8098db2073ba60b8a5e8be55b310f Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Mon, 13 Oct 2025 14:57:46 -0500
Subject: [PATCH 02/16] Add nemoguardrails/llm to the pyright pre-commit check

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 2e79e544d..bf31b0532 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -157,6 +157,7 @@ pyright = "^1.1.405"
 include = [
   "nemoguardrails/rails/**",
   "nemoguardrails/actions/**",
+  "nemoguardrails/llm/**",
   "nemoguardrails/embeddings/**",
   "nemoguardrails/cli/**",
   "nemoguardrails/kb/**",

From 75e60009fcf91f8a5f7a238189fd67b5902cf2c3 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Mon, 13 Oct 2025 15:18:34 -0500
Subject: [PATCH 03/16] Fix types in nemoguardrails/rails module

---
 nemoguardrails/rails/llm/llmrails.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/nemoguardrails/rails/llm/llmrails.py b/nemoguardrails/rails/llm/llmrails.py
index 96647158e..e6c4a9d5d 100644
--- a/nemoguardrails/rails/llm/llmrails.py
+++ b/nemoguardrails/rails/llm/llmrails.py
@@ -458,7 +458,7 @@ def _init_llms(self):
                 (model for model in self.config.models if model.type == "main"), None
             )
 
-            if main_model:
+            if main_model and main_model.model:
                 kwargs = self._prepare_model_kwargs(main_model)
                 self.llm = init_llm_model(
                     model_name=main_model.model,
@@ -489,7 +489,16 @@ def _init_llms(self):
                 continue
 
             try:
-                model_name = llm_config.model
+                model_name = (
+                    llm_config.model
+                    if llm_config.model
+                    else llm_config.parameters["model"]
+                )
+                if not model_name:
+                    raise ModelInitializationError(
+                        f"No model name provided in {llm_config}"
+                    )
+
                 provider_name = llm_config.engine
                 kwargs = self._prepare_model_kwargs(llm_config)
                 mode = llm_config.mode

From 092db00230a9759d9aea580c53c385b796fd94a1 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Mon, 13 Oct 2025 16:02:11 -0500
Subject: [PATCH 04/16] Use poetry install --all-extras --with dev to install
 langchain_nvidia_ai_endpoints for Github CI tests

---
 .github/workflows/pr-tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index 35652aba8..037a508d3 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -21,6 +21,7 @@ jobs:
       os: ${{ matrix.os }}
       image: ${{ matrix.image }}
       python-version: ${{ matrix.python-version }}
+      upgrade-deps: true
   pr-tests-summary:
     name: PR Tests Summary
     needs: pr-tests-matrix

From 5ea4eff486d854926c33e5e41969ec53d1332e23 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Mon, 13 Oct 2025 16:17:37 -0500
Subject: [PATCH 05/16] Install extras in test-coverage-report so the
 langchain_nvidia_ai_endpoints work for pyright type-checking

---
 .github/workflows/test-coverage-report.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-coverage-report.yml b/.github/workflows/test-coverage-report.yml
index 30b4e1dc8..3d072edad 100644
--- a/.github/workflows/test-coverage-report.yml
+++ b/.github/workflows/test-coverage-report.yml
@@ -28,7 +28,7 @@ jobs:
         run: poetry config virtualenvs.in-project true
 
       - name: Install dependencies
-        run: poetry install --with dev
+        run: poetry install --with dev --all-extras
 
       - name: Run pre-commit hooks
         run: poetry run make pre_commit

From b32e10ced5358772a0f6fef42cb464c57953f277 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Mon, 13 Oct 2025 16:21:10 -0500
Subject: [PATCH 06/16] Remove tritonclient from type-checking (should this be
 deprecated?

---
 pyproject.toml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index bf31b0532..5c4cb5094 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -167,6 +167,11 @@ include = [
   "tests/test_callbacks.py",
 ]
 
+# tritonclient is only supported for Python <= 3.8, imports fail pyright-checking
+exclude = [
+  "nemoguardrails/llm/providers/trtllm/**"
+]
+
 [tool.poetry.group.docs]
 optional = true
 

From 4f2053298b2bc6e413c5aa6f2207aa8ce65c506e Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Mon, 13 Oct 2025 16:49:46 -0500
Subject: [PATCH 07/16] Add upgrade-deps to the full-tests.yml file in Github
 CI/CD

---
 .github/workflows/full-tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/full-tests.yml b/.github/workflows/full-tests.yml
index 56898a41c..02b0a278f 100644
--- a/.github/workflows/full-tests.yml
+++ b/.github/workflows/full-tests.yml
@@ -32,6 +32,7 @@ jobs:
       os: ${{ matrix.os }}
       image: ${{ matrix.image }}
       python-version: ${{ matrix.python-version }}
+      upgrade-deps: true
   full-tests-summary:
     name: Full Tests Summary
     needs: full-tests-matrix

From f5b5d75aeabaa68c620d76bd67fb9904a72e7414 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Fri, 24 Oct 2025 12:10:14 -0500
Subject: [PATCH 08/16] Exclude providers/trtllm/** and
 providers/_langchain_nvidia_ai_endpoints_patch.py from type-checking

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5c4cb5094..4558978d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -169,7 +169,8 @@ include = [
 
 # tritonclient is only supported for Python <= 3.8, imports fail pyright-checking
 exclude = [
-  "nemoguardrails/llm/providers/trtllm/**"
+    "nemoguardrails/llm/providers/trtllm/**",
+    "nemoguardrails/llm/providers/_langchain_nvidia_ai_endpoints_patch.py"
 ]
 
 [tool.poetry.group.docs]

From 8cb9bf5c10c7be409424cd17d79a93af11581953 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Fri, 24 Oct 2025 12:12:16 -0500
Subject: [PATCH 09/16] Roll back type cleaning under llm/providers/trtllm now
 they're excluded from type-checking

---
 nemoguardrails/llm/providers/trtllm/client.py | 66 +++++++------------
 nemoguardrails/llm/providers/trtllm/llm.py    | 10 +--
 2 files changed, 24 insertions(+), 52 deletions(-)

diff --git a/nemoguardrails/llm/providers/trtllm/client.py b/nemoguardrails/llm/providers/trtllm/client.py
index 9e74d72c1..46fd2ff3f 100644
--- a/nemoguardrails/llm/providers/trtllm/client.py
+++ b/nemoguardrails/llm/providers/trtllm/client.py
@@ -19,25 +19,7 @@
 import queue
 import time
 from functools import partial
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
-
-# Try to import tritonclient dependencies, with fallbacks for type checking
-try:
-    import tritonclient.grpc as grpcclient
-    from tritonclient.grpc.service_pb2 import (
-        ModelInferResponse,  # type: ignore[attr-defined]
-    )
-
-    TRITONCLIENT_AVAILABLE = True
-except ImportError:
-    # Create dummy types when tritonclient is not available
-    grpcclient = Any  # type: ignore
-    ModelInferResponse = Any  # type: ignore
-    TRITONCLIENT_AVAILABLE = False
-
-if TYPE_CHECKING and not TRITONCLIENT_AVAILABLE:
-    import tritonclient.grpc as grpcclient  # type: ignore
-    from tritonclient.grpc.service_pb2 import ModelInferResponse  # type: ignore
+from typing import Any, Dict, List, Optional, Union
 
 STOP_WORDS = ["</s>"]
 BAD_WORDS = [""]
@@ -49,11 +31,11 @@ class TritonClient:
 
     def __init__(self, server_url: str) -> None:
         """Initialize the client."""
-        if not TRITONCLIENT_AVAILABLE:
-            raise ImportError("tritonclient is required for TensorRT-LLM support")
+        # pylint: disable-next=import-outside-toplevel
+        import tritonclient.grpc as grpcclient
 
         self.server_url = server_url
-        self.client = grpcclient.InferenceServerClient(server_url)  # type: ignore
+        self.client = grpcclient.InferenceServerClient(server_url)
 
     def load_model(self, model_name: str, timeout: int = 1000) -> None:
         """Load a model into the server."""
@@ -72,33 +54,29 @@ def load_model(self, model_name: str, timeout: int = 1000) -> None:
     def get_model_list(self) -> List[str]:
         """Get a list of models loaded in the triton server."""
         res = self.client.get_model_repository_index(as_json=True)
-        if res is None or "models" not in res:
-            return []
         return [model["name"] for model in res["models"]]
 
     def get_model_concurrency(self, model_name: str, timeout: int = 1000) -> int:
         """Get the modle concurrency."""
         self.load_model(model_name, timeout)
-        config_result = self.client.get_model_config(model_name, as_json=True)
-        if config_result is None or "config" not in config_result:
-            return 0
-        instances = config_result["config"].get("instance_group", [])
+        instances = self.client.get_model_config(model_name, as_json=True)["config"][
+            "instance_group"
+        ]
         return sum(instance["count"] * len(instance["gpus"]) for instance in instances)
 
     @staticmethod
     def process_result(result: Dict[str, str]) -> Dict[str, str]:
         """Post-process the result from the server."""
-        if not TRITONCLIENT_AVAILABLE:
-            raise ImportError("tritonclient is required for TensorRT-LLM support")
+        import google.protobuf.json_format  # pylint: disable=import-outside-toplevel
+        import tritonclient.grpc as grpcclient  # pylint: disable=import-outside-toplevel
 
-        import google.protobuf.json_format
+        # pylint: disable-next=import-outside-toplevel
+        from tritonclient.grpc.service_pb2 import ModelInferResponse
 
-        message = ModelInferResponse()  # type: ignore[misc]
+        message = ModelInferResponse()
         google.protobuf.json_format.Parse(json.dumps(result), message)
-        infer_result = grpcclient.InferResult(message)  # type: ignore
+        infer_result = grpcclient.InferResult(message)
         np_res = infer_result.as_numpy("OUTPUT_0")
-        if np_res is None:
-            return {"OUTPUT_0": ""}
         if np_res.ndim == 2:
             generated_text = np_res[0, 0].decode()
         else:
@@ -162,21 +140,21 @@ def close_streaming(self) -> None:
         self.client.stop_stream()
 
     @staticmethod
-    def generate_outputs() -> List[Any]:
+    def generate_outputs() -> List["grpcclient.InferRequestedOutput"]:
         """Generate the expected output structure."""
-        if not TRITONCLIENT_AVAILABLE:
-            raise ImportError("tritonclient is required for TensorRT-LLM support")
-        return [grpcclient.InferRequestedOutput("OUTPUT_0")]  # type: ignore
+        import tritonclient.grpc as grpcclient  # pylint: disable=import-outside-toplevel
+
+        return [grpcclient.InferRequestedOutput("OUTPUT_0")]
 
     @staticmethod
-    def prepare_tensor(name: str, input_data: Any) -> Any:
+    def prepare_tensor(name: str, input_data: Any) -> "grpcclient.InferInput":
         """Prepare an input data structure."""
-        if not TRITONCLIENT_AVAILABLE:
-            raise ImportError("tritonclient is required for TensorRT-LLM support")
+        import tritonclient.grpc as grpcclient  # pylint: disable=import-outside-toplevel
 
+        # pylint: disable-next=import-outside-toplevel
         from tritonclient.utils import np_to_triton_dtype
 
-        t = grpcclient.InferInput(  # type: ignore
+        t = grpcclient.InferInput(
             name, input_data.shape, np_to_triton_dtype(input_data.dtype)
         )
         t.set_data_from_numpy(input_data)
@@ -192,7 +170,7 @@ def generate_inputs(  # pylint: disable=too-many-arguments,too-many-locals
         beam_width: int = 1,
         repetition_penalty: float = 1,
         length_penalty: float = 1.0,
-    ) -> List[Any]:
+    ) -> List["grpcclient.InferInput"]:
         """Create the input for the triton inference server."""
         import numpy as np  # pylint: disable=import-outside-toplevel
 
diff --git a/nemoguardrails/llm/providers/trtllm/llm.py b/nemoguardrails/llm/providers/trtllm/llm.py
index ea332d7cc..cec6a5fe1 100644
--- a/nemoguardrails/llm/providers/trtllm/llm.py
+++ b/nemoguardrails/llm/providers/trtllm/llm.py
@@ -18,13 +18,7 @@
 
 import queue
 from functools import partial
-from typing import TYPE_CHECKING, Any, Dict, List, Optional
-
-if TYPE_CHECKING:
-    try:
-        from tritonclient.utils import InferenceServerException
-    except ImportError:
-        InferenceServerException = Exception
+from typing import Any, Dict, List, Optional
 
 from langchain.callbacks.manager import CallbackManagerForLLMRun
 from langchain_core.language_models.llms import BaseLLM
@@ -113,7 +107,7 @@ def _llm_type(self) -> str:
     def _call(
         self,
         prompt: str,
-        stop: Optional[List[str]] = None,  # pylint: disable=unused-argument
+        stop: Optional[List[str]] = None,
         run_manager: Optional[CallbackManagerForLLMRun] = None,
         **kwargs: Any,
     ) -> str:

From 3655b1bd92e6f61909be2f3e6c08c4a0ecb0f469 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Fri, 24 Oct 2025 12:27:14 -0500
Subject: [PATCH 10/16] Type-clean the LFU cache implementation

---
 nemoguardrails/llm/cache/lfu.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/nemoguardrails/llm/cache/lfu.py b/nemoguardrails/llm/cache/lfu.py
index 2c48ecb75..968aacf0e 100644
--- a/nemoguardrails/llm/cache/lfu.py
+++ b/nemoguardrails/llm/cache/lfu.py
@@ -54,7 +54,8 @@ def append(self, node: LFUNode) -> None:
         """Add node to the end of the list (before tail)."""
         node.prev = self.tail.prev
         node.next = self.tail
-        self.tail.prev.next = node
+        if self.tail.prev:
+            self.tail.prev.next = node
         self.tail.prev = node
         self.size += 1
 
@@ -67,8 +68,10 @@ def pop(self, node: Optional[LFUNode] = None) -> Optional[LFUNode]:
             node = self.head.next
 
         # Remove node from the list
-        node.prev.next = node.next
-        node.next.prev = node.prev
+        if node and node.prev:
+            node.prev.next = node.next
+        if node and node.next:
+            node.next.prev = node.prev
         self.size -= 1
 
         return node
@@ -121,6 +124,7 @@ def __init__(
                 "evictions": 0,
                 "puts": 0,
                 "updates": 0,
+                "hit_rate": 0.0,
             }
 
     def _update_node_freq(self, node: LFUNode) -> None:
@@ -272,7 +276,7 @@ def get_stats(self) -> dict:
 
             # Calculate hit rate
             total_requests = stats["hits"] + stats["misses"]
-            stats["hit_rate"] = (
+            stats["hit_rate"] = float(
                 stats["hits"] / total_requests if total_requests > 0 else 0.0
             )
 
@@ -288,6 +292,7 @@ def reset_stats(self) -> None:
                     "evictions": 0,
                     "puts": 0,
                     "updates": 0,
+                    "hit_rate": 0.0,
                 }
 
     def _check_and_log_stats(self) -> None:

From 59c1da0a2da265b5660ef4bb3ada26cbc6ca66c4 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Fri, 24 Oct 2025 12:38:04 -0500
Subject: [PATCH 11/16] Address Pouyan's feedback. Removed Model.model Optional
 and default value

---
 nemoguardrails/llm/helpers.py                        |  2 +-
 nemoguardrails/llm/providers/huggingface/pipeline.py |  2 +-
 nemoguardrails/rails/llm/config.py                   |  3 +--
 nemoguardrails/rails/llm/llmrails.py                 | 11 +----------
 4 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/nemoguardrails/llm/helpers.py b/nemoguardrails/llm/helpers.py
index 23d001e90..88488dcf2 100644
--- a/nemoguardrails/llm/helpers.py
+++ b/nemoguardrails/llm/helpers.py
@@ -45,7 +45,7 @@ def model_kwargs(self):
             These are needed to allow changes to the arguments of the LLM calls.
             """
             if hasattr(llm_instance, "model_kwargs"):
-                return getattr(llm_instance, "model_kwargs")
+                return llm_instance.model_kwargs  # type: ignore[attr-defined] (We check in line above)
             return {}
 
         @property
diff --git a/nemoguardrails/llm/providers/huggingface/pipeline.py b/nemoguardrails/llm/providers/huggingface/pipeline.py
index 918837693..b81cafb90 100644
--- a/nemoguardrails/llm/providers/huggingface/pipeline.py
+++ b/nemoguardrails/llm/providers/huggingface/pipeline.py
@@ -72,7 +72,7 @@ def _call(
                 "Streaming mode not supported for HuggingFacePipeline in NeMo Guardrails!"
             )
 
-        llm_result = getattr(self, "_generate")(
+        llm_result = self._generate(  # type: ignore[attr-defined]
             [prompt],
             stop=stop,
             run_manager=run_manager,
diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py
index 90d24bdc7..4248b2be1 100644
--- a/nemoguardrails/rails/llm/config.py
+++ b/nemoguardrails/rails/llm/config.py
@@ -112,8 +112,7 @@ class Model(BaseModel):
 
     type: str
     engine: str
-    model: Optional[str] = Field(
-        default=None,
+    model: str = Field(
         description="The name of the model. If not specified, it should be specified through the parameters attribute.",
     )
     api_key_env_var: Optional[str] = Field(
diff --git a/nemoguardrails/rails/llm/llmrails.py b/nemoguardrails/rails/llm/llmrails.py
index e6c4a9d5d..f8b293f09 100644
--- a/nemoguardrails/rails/llm/llmrails.py
+++ b/nemoguardrails/rails/llm/llmrails.py
@@ -489,16 +489,7 @@ def _init_llms(self):
                 continue
 
             try:
-                model_name = (
-                    llm_config.model
-                    if llm_config.model
-                    else llm_config.parameters["model"]
-                )
-                if not model_name:
-                    raise ModelInitializationError(
-                        f"No model name provided in {llm_config}"
-                    )
-
+                model_name = llm_config.model
                 provider_name = llm_config.engine
                 kwargs = self._prepare_model_kwargs(llm_config)
                 mode = llm_config.mode

From a4aad26a3903fc66b00c7a5a85c84348038feac8 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Fri, 24 Oct 2025 12:39:11 -0500
Subject: [PATCH 12/16] fix typo

---
 nemoguardrails/llm/models/initializer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nemoguardrails/llm/models/initializer.py b/nemoguardrails/llm/models/initializer.py
index bd97d03fd..fc0f4b5d6 100644
--- a/nemoguardrails/llm/models/initializer.py
+++ b/nemoguardrails/llm/models/initializer.py
@@ -26,7 +26,7 @@
 )
 
 
-# later we can easily conver it to a class
+# later we can easily convert it to a class
 def init_llm_model(
     model_name: str,
     provider_name: str,

From 1ad6b73633584fc5a8c9a128228cd7f73ff57ab1 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Fri, 24 Oct 2025 12:40:43 -0500
Subject: [PATCH 13/16] Revert github workflow changes (not needed now we
 exclude trtllm from type-checking)

---
 .github/workflows/full-tests.yml           | 1 -
 .github/workflows/pr-tests.yml             | 1 -
 .github/workflows/test-coverage-report.yml | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/full-tests.yml b/.github/workflows/full-tests.yml
index 02b0a278f..56898a41c 100644
--- a/.github/workflows/full-tests.yml
+++ b/.github/workflows/full-tests.yml
@@ -32,7 +32,6 @@ jobs:
       os: ${{ matrix.os }}
       image: ${{ matrix.image }}
       python-version: ${{ matrix.python-version }}
-      upgrade-deps: true
   full-tests-summary:
     name: Full Tests Summary
     needs: full-tests-matrix
diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml
index 037a508d3..35652aba8 100644
--- a/.github/workflows/pr-tests.yml
+++ b/.github/workflows/pr-tests.yml
@@ -21,7 +21,6 @@ jobs:
       os: ${{ matrix.os }}
       image: ${{ matrix.image }}
       python-version: ${{ matrix.python-version }}
-      upgrade-deps: true
   pr-tests-summary:
     name: PR Tests Summary
     needs: pr-tests-matrix
diff --git a/.github/workflows/test-coverage-report.yml b/.github/workflows/test-coverage-report.yml
index 3d072edad..30b4e1dc8 100644
--- a/.github/workflows/test-coverage-report.yml
+++ b/.github/workflows/test-coverage-report.yml
@@ -28,7 +28,7 @@ jobs:
         run: poetry config virtualenvs.in-project true
 
       - name: Install dependencies
-        run: poetry install --with dev --all-extras
+        run: poetry install --with dev
 
       - name: Run pre-commit hooks
         run: poetry run make pre_commit

From a2b64d65e97be781e8d752927dd8d9998d250585 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Fri, 24 Oct 2025 12:43:24 -0500
Subject: [PATCH 14/16] Remove comment from pyproject.toml

---
 pyproject.toml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4558978d8..5c743052e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -166,8 +166,6 @@ include = [
   "nemoguardrails/server/**",
   "tests/test_callbacks.py",
 ]
-
-# tritonclient is only supported for Python <= 3.8, imports fail pyright-checking
 exclude = [
     "nemoguardrails/llm/providers/trtllm/**",
     "nemoguardrails/llm/providers/_langchain_nvidia_ai_endpoints_patch.py"

From 93dd296a6a916b6e3d7feb55cca9d2e038040c05 Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Fri, 24 Oct 2025 12:52:08 -0500
Subject: [PATCH 15/16] Revert mandatory Model name field change, add
 None-guard back

---
 nemoguardrails/rails/llm/config.py   | 3 ++-
 nemoguardrails/rails/llm/llmrails.py | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/nemoguardrails/rails/llm/config.py b/nemoguardrails/rails/llm/config.py
index 4248b2be1..90d24bdc7 100644
--- a/nemoguardrails/rails/llm/config.py
+++ b/nemoguardrails/rails/llm/config.py
@@ -112,7 +112,8 @@ class Model(BaseModel):
 
     type: str
     engine: str
-    model: str = Field(
+    model: Optional[str] = Field(
+        default=None,
         description="The name of the model. If not specified, it should be specified through the parameters attribute.",
     )
     api_key_env_var: Optional[str] = Field(
diff --git a/nemoguardrails/rails/llm/llmrails.py b/nemoguardrails/rails/llm/llmrails.py
index f8b293f09..187300aa2 100644
--- a/nemoguardrails/rails/llm/llmrails.py
+++ b/nemoguardrails/rails/llm/llmrails.py
@@ -490,6 +490,9 @@ def _init_llms(self):
 
             try:
                 model_name = llm_config.model
+                if not model_name:
+                    raise ValueError("LLM Config model field not set")
+
                 provider_name = llm_config.engine
                 kwargs = self._prepare_model_kwargs(llm_config)
                 mode = llm_config.mode

From 721a71b3f90b3b7a4611e60ebb6ab731d5aa27fd Mon Sep 17 00:00:00 2001
From: tgasser-nv <200644301+tgasser-nv@users.noreply.github.com>
Date: Mon, 27 Oct 2025 17:11:09 -0500
Subject: [PATCH 16/16] Address last feedback

---
 nemoguardrails/llm/cache/lfu.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/nemoguardrails/llm/cache/lfu.py b/nemoguardrails/llm/cache/lfu.py
index 968aacf0e..755f84b17 100644
--- a/nemoguardrails/llm/cache/lfu.py
+++ b/nemoguardrails/llm/cache/lfu.py
@@ -54,7 +54,7 @@ def append(self, node: LFUNode) -> None:
         """Add node to the end of the list (before tail)."""
         node.prev = self.tail.prev
         node.next = self.tail
-        if self.tail.prev:
+        if self.tail.prev is not None:
             self.tail.prev.next = node
         self.tail.prev = node
         self.size += 1
@@ -68,9 +68,9 @@ def pop(self, node: Optional[LFUNode] = None) -> Optional[LFUNode]:
             node = self.head.next
 
         # Remove node from the list
-        if node and node.prev:
+        if node is not None and node.prev is not None:
             node.prev.next = node.next
-        if node and node.next:
+        if node is not None and node.next is not None:
             node.next.prev = node.prev
         self.size -= 1
 
@@ -276,7 +276,7 @@ def get_stats(self) -> dict:
 
             # Calculate hit rate
             total_requests = stats["hits"] + stats["misses"]
-            stats["hit_rate"] = float(
+            stats["hit_rate"] = (
                 stats["hits"] / total_requests if total_requests > 0 else 0.0
             )