From 48eb24457da1c5d6e7640db8983c1b44b80595b7 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 16 Dec 2025 10:42:07 +0100
Subject: [PATCH 01/51] Add PyTest-base testing infra for generic platform

---
 DeeployTest/conftest.py               | 117 +++++++++
 DeeployTest/pytest.ini                |  51 ++++
 DeeployTest/testUtils/pytestRunner.py | 343 ++++++++++++++++++++++++++
 DeeployTest/test_generic.py           | 155 ++++++++++++
 pyproject.toml                        |   4 +-
 5 files changed, 669 insertions(+), 1 deletion(-)
 create mode 100644 DeeployTest/conftest.py
 create mode 100644 DeeployTest/pytest.ini
 create mode 100644 DeeployTest/testUtils/pytestRunner.py
 create mode 100644 DeeployTest/test_generic.py

diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
new file mode 100644
index 0000000000..840f678f9f
--- /dev/null
+++ b/DeeployTest/conftest.py
@@ -0,0 +1,117 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+from pathlib import Path
+
+import coloredlogs
+import pytest
+
+from Deeploy.Logging import DEFAULT_FMT, DEFAULT_LOGGER as log
+
+
+def pytest_addoption(parser: pytest.Parser) -> None:
+    """Native PyTest hook: add custom command-line options for Deeploy tests."""
+    parser.addoption(
+        "--skipgen",
+        action="store_true",
+        default=False,
+        help="Skip network generation step",
+    )
+    parser.addoption(
+        "--skipsim",
+        action="store_true",
+        default=False,
+        help="Skip simulation step (only generate and build)",
+    )
+    parser.addoption(
+        "--toolchain",
+        action="store",
+        default="LLVM",
+        help="Compiler toolchain to use (LLVM or GCC)",
+    )
+    parser.addoption(
+        "--toolchain-install-dir",
+        action="store",
+        default=os.environ.get("LLVM_INSTALL_DIR"),
+        help="Path to toolchain installation directory",
+    )
+    parser.addoption(
+        "--cmake-args",
+        action="append",
+        default=[],
+        help="Additional CMake arguments (can be used multiple times)",
+    )
+
+def pytest_configure(config: pytest.Config) -> None:
+    """Native PyTest hook: configure pytest for Deeploy tests."""
+    # Register custom markers
+    config.addinivalue_line(
+        "markers", "generic: mark test as a Generic platform test"
+    )
+    config.addinivalue_line(
+        "markers", "kernels: mark test as a kernel test (individual operators)"
+    )
+    config.addinivalue_line(
+        "markers", "models: mark test as a model test (full networks)"
+    )
+    config.addinivalue_line(
+        "markers", "slow: mark test as slow running"
+    )
+    
+    # Configure logging based on verbosity
+    verbosity = config.option.verbose
+    if verbosity >= 3:
+        coloredlogs.install(level='DEBUG', logger=log, fmt=DEFAULT_FMT)
+    elif verbosity >= 2:
+        coloredlogs.install(level='INFO', logger=log, fmt=DEFAULT_FMT)
+    else:
+        coloredlogs.install(level='WARNING', logger=log, fmt=DEFAULT_FMT)
+
+@pytest.fixture(scope="session")
+def deeploy_test_dir():
+    """Return the DeeployTest directory path."""
+    return Path(__file__).parent
+
+@pytest.fixture(scope="session")
+def tests_dir(deeploy_test_dir):
+    """Return the Tests directory path."""
+    return deeploy_test_dir / "Tests"
+
+@pytest.fixture(scope="session")
+def toolchain_dir(request):
+    """Return the toolchain installation directory."""
+    toolchain_install = request.config.getoption("--toolchain-install-dir")
+    if toolchain_install is None:
+        pytest.skip(reason="LLVM_INSTALL_DIR not set")
+    return toolchain_install
+
+@pytest.fixture(scope="session")
+def ccache_dir():
+    """Setup and return ccache directory."""
+    ccache_path = Path("/app/.ccache")
+    if ccache_path.exists():
+        os.environ["CCACHE_DIR"] = str(ccache_path)
+        return ccache_path
+    return None
+
+@pytest.fixture
+def skipgen(request):
+    """Return whether to skip network generation."""
+    return request.config.getoption("--skipgen")
+
+@pytest.fixture
+def skipsim(request):
+    """Return whether to skip simulation."""
+    return request.config.getoption("--skipsim")
+
+@pytest.fixture
+def toolchain(request):
+    """Return the toolchain to use."""
+    return request.config.getoption("--toolchain")
+
+@pytest.fixture
+def cmake_args(request):
+    """Return additional CMake arguments."""
+    return request.config.getoption("--cmake-args")
diff --git a/DeeployTest/pytest.ini b/DeeployTest/pytest.ini
new file mode 100644
index 0000000000..22e6ec4e45
--- /dev/null
+++ b/DeeployTest/pytest.ini
@@ -0,0 +1,51 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+[pytest]
+# Pytest configuration for Deeploy tests
+
+# Test discovery patterns
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+
+# Minimum version
+minversion = 6.0
+
+# Add current directory to Python path
+pythonpath = .
+
+# Default options
+addopts = 
+    -ra
+    --strict-markers
+    --strict-config
+    --showlocals
+
+# Test output
+console_output_style = progress
+
+# Logging
+log_cli = false
+log_cli_level = INFO
+log_cli_format = %(levelname)s %(message)s
+
+# Warnings
+filterwarnings =
+    error
+    ignore::DeprecationWarning
+    ignore::PendingDeprecationWarning
+
+# Markers are defined in conftest.py via pytest_configure
+
+# Timeout for tests (in seconds) - uncomment if pytest-timeout is installed
+# timeout = 300
+# timeout_method = thread
+
+# Coverage options - uncomment if pytest-cov is installed
+# [coverage:run]
+# source = Deeploy
+# omit = 
+#     */tests/*
+#     */test_*
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
new file mode 100644
index 0000000000..6c46ac915f
--- /dev/null
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -0,0 +1,343 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import re
+import shutil
+import subprocess
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Literal, Optional, Tuple
+
+from Deeploy.Logging import DEFAULT_LOGGER as log
+
+
+@dataclass
+class DeeployTestConfig:
+    """Configuration for a single test case."""
+    test_name: str
+    test_dir: str
+    platform: str
+    simulator: Literal['gvsoc', 'banshee', 'qemu', 'vsim', 'vsim.gui', 'host', 'none']
+    tiling: bool
+    gen_dir: str
+    build_dir: str
+    toolchain: str = "LLVM"
+    toolchain_install_dir: Optional[str] = None
+    cmake_args: List[str] = None
+    gen_args: List[str] = None
+    verbose: int = 0
+    debug: bool = False
+    
+    def __post_init__(self):
+        if self.cmake_args is None:
+            self.cmake_args = []
+        if self.gen_args is None:
+            self.gen_args = []
+        if self.toolchain_install_dir is None:
+            self.toolchain_install_dir = os.environ.get('LLVM_INSTALL_DIR')
+
+
+@dataclass
+class TestResult:
+    """Results from running a test."""
+    success: bool
+    error_count: int
+    total_count: int
+    stdout: str
+    stderr: str = ""
+    runtime_cycles: Optional[int] = None
+
+
+def get_test_paths(test_dir: str, platform: str, base_dir: Optional[str] = None) -> Tuple[str, str, str]:
+    """
+    Args:
+        test_dir: Path to test directory (e.g., "Tests/Adder" or absolute path)
+        platform: Platform name (e.g., "Generic")
+        base_dir: Base directory for tests (defaults to DeeployTest/)
+        
+    Returns:
+        Tuple of (gen_dir, test_dir_abs, test_name)
+    """
+    if base_dir is None:
+        # Get the absolute path of this script's parent directory (testUtils -> DeeployTest)
+        script_path = Path(__file__).resolve()
+        base_dir = script_path.parent.parent
+    else:
+        base_dir = Path(base_dir)
+    
+    test_path = Path(test_dir)
+    if not test_path.is_absolute():
+        test_path = base_dir / test_dir
+    
+    test_path = test_path.resolve()
+    test_name = test_path.name
+    
+    gen_dir_name = f"TEST_{platform.upper()}"
+    
+    # Check if path is inside base_dir
+    try:
+        rel_path = test_path.relative_to(base_dir)
+        gen_dir = base_dir / gen_dir_name / rel_path
+    except ValueError:
+        # Path is outside base_dir
+        gen_dir = base_dir / gen_dir_name / test_name
+        log.warning(f"Test path {test_path} is outside base directory. Using {gen_dir}")
+    
+    return str(gen_dir), str(test_path), test_name
+
+def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
+    """
+    Args:
+        config: Test configuration
+        skip: If True, skip generation (useful for re-running tests)
+        
+    Raises:
+        RuntimeError: If network generation fails
+    """
+    if skip:
+        log.info(f"Skipping network generation for {config.test_name}")
+        return
+    
+    script_dir = Path(__file__).parent.parent
+    
+    if config.tiling:
+        generation_script = script_dir / "testMVP.py"
+    else:
+        generation_script = script_dir / "generateNetwork.py"
+    
+    cmd = [
+        "python", str(generation_script),
+        "-d", config.gen_dir,
+        "-t", config.test_dir,
+        "-p", config.platform,
+    ]
+    
+    # Add verbosity flags
+    if config.verbose > 0:
+        cmd.append("-" + "v" * config.verbose)
+    
+    # Add debug flag
+    if config.debug:
+        cmd.append("--debug")
+    
+    # Add additional generation arguments
+    cmd.extend(config.gen_args)
+    
+    log.debug(f"[pytestRunner] Generation command: {' '.join(cmd)}")
+    
+    result = subprocess.run(cmd, capture_output=True, text=True)
+    
+    if result.returncode != 0:
+        log.error(f"Network generation failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
+        raise RuntimeError(f"Network generation failed for {config.test_name}")
+
+
+def configure_cmake(config: DeeployTestConfig) -> None:
+    """
+    Args:
+        config: Test configuration
+        
+    Raises:
+        RuntimeError: If CMake configuration fails
+    """
+    assert config.toolchain_install_dir is not None, \
+        "LLVM_INSTALL_DIR environment variable not set"
+    
+    cmake_cmd = os.environ.get("CMAKE", "cmake")
+    if cmake_cmd == "cmake" and shutil.which("cmake") is None:
+        raise RuntimeError(
+            "CMake not found. Please install CMake or set CMAKE environment variable"
+        )
+    
+    # Build CMake command
+    cmd = [
+        cmake_cmd,
+        f"-DTOOLCHAIN={config.toolchain}",
+        f"-DTOOLCHAIN_INSTALL_DIR={config.toolchain_install_dir}",
+        f"-DGENERATED_SOURCE={config.gen_dir}",
+        f"-Dplatform={config.platform}",
+        f"-DTESTNAME={config.test_name}",
+        f"-B{config.build_dir}",
+    ]
+    
+    # Add custom CMake arguments
+    for arg in config.cmake_args:
+        if not arg.startswith("-D"):
+            arg = "-D" + arg
+        cmd.append(arg)
+    
+    # Add simulator flags
+    if config.simulator == 'banshee':
+        cmd.append("-Dbanshee_simulation=ON")
+    else:
+        cmd.append("-Dbanshee_simulation=OFF")
+    
+    if config.simulator == 'gvsoc':
+        cmd.append("-Dgvsoc_simulation=ON")
+    else:
+        cmd.append("-Dgvsoc_simulation=OFF")
+    
+    # Last argument is the source directory
+    script_dir = Path(__file__).parent.parent
+    cmd.append(str(script_dir.parent))
+    
+    env = os.environ.copy()
+    if config.verbose >= 3:
+        env["VERBOSE"] = "1"
+    
+    log.debug(f"[pytestRunner] CMake command: {' '.join(cmd)}")
+    
+    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
+    
+    if result.returncode != 0:
+        log.error(f"CMake configuration failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
+        raise RuntimeError(f"CMake configuration failed for {config.test_name}")
+
+def build_binary(config: DeeployTestConfig) -> None:
+    """
+    Args:
+        config: Test configuration
+        
+    Raises:
+        RuntimeError: If build fails
+    """
+    cmake_cmd = os.environ.get("CMAKE", "cmake")
+    
+    cmd = [
+        cmake_cmd,
+        "--build", config.build_dir,
+        "--target", config.test_name,
+    ]
+    
+    env = os.environ.copy()
+    if config.verbose >= 3:
+        env["VERBOSE"] = "1"
+    
+    log.debug(f"[pytestRunner] Build command: {' '.join(cmd)}")
+    
+    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
+    
+    if result.returncode != 0:
+        log.error(f"Build failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
+        raise RuntimeError(f"Build failed for {config.test_name}")
+
+
+def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
+    """
+    Args:
+        config: Test configuration
+        skip: If True, skip simulation (useful for build-only tests)
+        
+    Returns:
+        TestResult with parsed output
+        
+    Raises:
+        RuntimeError: If simulation cannot be executed
+    """
+    if skip:
+        log.info(f"Skipping simulation for {config.test_name}")
+        return TestResult(success=True, error_count=0, total_count=0, stdout="Skipped")
+    
+    if config.simulator == 'none':
+        raise RuntimeError("No simulator specified!")
+    
+    if config.simulator == 'host':
+        # Run binary directly
+        binary_path = Path(config.build_dir) / "bin" / config.test_name
+        cmd = [str(binary_path)]
+    else:
+        # Run via CMake target
+        cmake_cmd = os.environ.get("CMAKE", "cmake")
+        cmd = [
+            cmake_cmd,
+            "--build", config.build_dir,
+            "--target", f"{config.simulator}_{config.test_name}",
+        ]
+    
+    env = os.environ.copy()
+    if config.verbose >= 3:
+        env["VERBOSE"] = "1"
+    
+    # Add banshee-specific logging
+    if config.simulator == 'banshee':
+        if config.verbose == 1:
+            env["BANSHEE_LOG"] = "warn"
+        elif config.verbose == 2:
+            env["BANSHEE_LOG"] = "info"
+        elif config.verbose >= 3:
+            env["BANSHEE_LOG"] = "debug"
+    
+    log.debug(f"[pytestRunner] Simulation command: {' '.join(cmd)}")
+    
+    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
+    
+    # Parse output for error count
+    output = result.stdout + result.stderr
+    
+    # Look for "Errors: X out of Y" pattern
+    error_match = re.search(r'Errors:\s*(\d+)\s*out\s*of\s*(\d+)', output)
+    
+    if error_match:
+        error_count = int(error_match.group(1))
+        total_count = int(error_match.group(2))
+        success = (error_count == 0)
+    else:
+        # Could not parse output - treat as failure
+        log.warning(f"Could not parse error count from output:\n{output}")
+        error_count = -1
+        total_count = -1
+        success = False
+    
+    # Try to parse runtime cycles
+    runtime_cycles = None
+    cycle_match = re.search(r'Runtime:\s*(\d+)\s*cycles', output)
+    if cycle_match:
+        runtime_cycles = int(cycle_match.group(1))
+    
+    return TestResult(
+        success=success,
+        error_count=error_count,
+        total_count=total_count,
+        stdout=result.stdout,
+        stderr=result.stderr,
+        runtime_cycles=runtime_cycles,
+    )
+
+def run_complete_test(config: DeeployTestConfig, skipgen: bool = False, skipsim: bool = False) -> TestResult:
+    """
+    Run a complete test: generate, configure, build, and simulate.
+    
+    Args:
+        config: Test configuration
+        skipgen: Skip network generation
+        skipsim: Skip simulation
+        
+    Returns:
+        TestResult with parsed output
+    """
+    log.info(f"################## Testing {config.test_name} on {config.platform} Platform ##################")
+    
+    # Step 1: Generate network
+    generate_network(config, skip=skipgen)
+    
+    # Step 2: Configure CMake
+    configure_cmake(config)
+    
+    # Step 3: Build binary
+    build_binary(config)
+    
+    # Step 4: Run simulation
+    result = run_simulation(config, skip=skipsim)
+    
+    return result
+
+def get_worker_id() -> str:
+    """
+    Get the pytest-xdist worker ID for parallel test execution.
+    
+    Returns:
+        Worker ID string (e.g., 'gw0', 'gw1', 'master' for non-parallel)
+    """
+    return os.environ.get("PYTEST_XDIST_WORKER", "master")
diff --git a/DeeployTest/test_generic.py b/DeeployTest/test_generic.py
new file mode 100644
index 0000000000..9854e161c9
--- /dev/null
+++ b/DeeployTest/test_generic.py
@@ -0,0 +1,155 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+from typing import List
+
+import pytest
+
+from testUtils.pytestRunner import (
+    DeeployTestConfig,
+    get_test_paths,
+    run_complete_test,
+    get_worker_id,
+)
+
+KERNEL_TESTS = [
+    "Adder",
+    "MultIO",
+    "test1DConvolution",
+    "test2DConvolution",
+    "test1DDWConvolution",
+    "test2DDWConvolution",
+    "test1DPad",
+    "test2DPad",
+    "testGEMM",
+    "testMatMul",
+    "testMatMulAdd",
+    "testMaxPool",
+    "testRQConv",
+    "testRQMatMul",
+    "testReduceSum",
+    "testReduceMean",
+    "testSlice",
+    "testRequantizedDWConv",
+    "test2DRequantizedConv",
+    "iSoftmax",
+    "testFloatAdder",
+    "testFloatGEMM",
+    "testFloat2DConvolution",
+    "testFloat2DConvolutionBias",
+    "testFloat2DConvolutionZeroBias",
+    "testFloatLayerNorm",
+    "testFloatDiv",
+    "testFloat2DDWConvolution",
+    "testFloat2DDWConvolutionBias",
+    "testFloat2DDWConvolutionZeroBias",
+    "testFloatRelu",
+    "testFloatMaxPool",
+    "testFloatMatmul",
+    "testFloatReshapeWithSkipConnection",
+    "testFloatSoftmax",
+    "testFloatTranspose",
+    "testFloatMul",
+    "testFloatPowScalar",
+    "testFloatPowVector",
+    "testFloatSqrt",
+    "testFloatRMSNorm",
+    "Quant",
+    "Dequant",
+    "QuantizedLinear",
+]
+
+MODEL_TESTS = [
+    "simpleRegression",
+    "WaveFormer",
+    "simpleCNN",
+    "ICCT",
+    "ICCT_ITA",
+    "ICCT_8",
+    "ICCT_ITA_8",
+    "miniMobileNet",
+    "miniMobileNetv2",
+    "CCT/CCT_1_16_16_8",
+    "CCT/CCT_2_32_32_128_Opset20",
+    "testFloatDemoTinyViT",
+    "Autoencoder1D",
+]
+
+def create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args):
+    """
+    Create DeeployTestConfig for a specific test.
+    
+    Args:
+        test_name: Name of the test
+        deeploy_test_dir: Base DeeployTest directory (from fixture)
+        toolchain: Toolchain to use - LLVM/GCC (from fixture)
+        toolchain_dir: Path to toolchain installation (from fixture)
+        cmake_args: Additional CMake arguments (from fixture)
+        
+    Returns:
+        DeeployTestConfig instance
+    """
+    platform = "Generic"
+    test_dir = f"Tests/{test_name}"
+    
+    gen_dir, test_dir_abs, test_name_clean = get_test_paths(
+        test_dir, platform, base_dir=deeploy_test_dir
+    )
+    
+    worker_id = get_worker_id()
+    build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}")
+    
+    config = DeeployTestConfig(
+        test_name=test_name_clean,
+        test_dir=test_dir_abs,
+        platform=platform,
+        simulator="host",
+        tiling=False,
+        gen_dir=gen_dir,
+        build_dir=build_dir,
+        toolchain=toolchain,
+        toolchain_install_dir=toolchain_dir,
+        cmake_args=cmake_args,
+    )
+    
+    return config
+
+def run_and_assert_test(test_name, config, skipgen, skipsim):
+    """
+    Shared helper function to run a test and assert its results.
+    
+    Args:
+        test_name: Name of the test
+        config: DeeployTestConfig instance
+        skipgen: Whether to skip network generation
+        skipsim: Whether to skip simulation
+    """
+    # Run the complete test
+    result = run_complete_test(config, skipgen=skipgen, skipsim=skipsim)
+    
+    # Assert results
+    assert result.success, (
+        f"Test {test_name} failed with {result.error_count} errors out of {result.total_count}\n"
+        f"Output:\n{result.stdout}"
+    )
+    
+    if result.error_count >= 0:  # Valid parse
+        assert result.error_count == 0, (
+            f"Found {result.error_count} errors out of {result.total_count} tests"
+        )
+    
+@pytest.mark.generic
+@pytest.mark.kernels
+@pytest.mark.parametrize("test_name", KERNEL_TESTS, ids=KERNEL_TESTS)
+def test_generic_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    config = create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args)
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+@pytest.mark.generic
+@pytest.mark.models
+@pytest.mark.parametrize("test_name", MODEL_TESTS, ids=MODEL_TESTS)
+def test_model(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    config = create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args)
+    run_and_assert_test(test_name, config, skipgen, skipsim)
diff --git a/pyproject.toml b/pyproject.toml
index d807bddab4..bbe7530948 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,9 @@ dependencies = [
 'pytest',
 'ortools',
 'plotly',
-'coloredlogs'
+'coloredlogs',
+'pytest',
+'pytest-xdist',
 ]
 
 [project.urls]

From f918ca28e48f5ef75951ae0544892cbb637e17a9 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 16 Dec 2025 10:57:10 +0100
Subject: [PATCH 02/51] Remove nvidia-pyindex install (not needed anymore)

---
 Container/Dockerfile.deeploy | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Container/Dockerfile.deeploy b/Container/Dockerfile.deeploy
index 68f64748cc..7aa05c0378 100644
--- a/Container/Dockerfile.deeploy
+++ b/Container/Dockerfile.deeploy
@@ -103,7 +103,7 @@ RUN mkdir -p /root/.cargo/bin/ && \
     curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
     python get-pip.py && \
     rm get-pip.py && \
-    pip install nvidia-pyindex && \
+    # pip install nvidia-pyindex && \
     pip install toml-to-requirements && \
     toml-to-req --toml-file pyproject.toml && \
     pip install -r requirements.txt

From 77b32302b22873cbd18403ed3a101d76faa76bfc Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 16 Dec 2025 14:04:00 +0100
Subject: [PATCH 03/51] Add nvidia channel to the pip config instead of via
 nvidia-pyindex (depreciated)

---
 Container/Dockerfile.deeploy | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Container/Dockerfile.deeploy b/Container/Dockerfile.deeploy
index 7aa05c0378..65265a6eeb 100644
--- a/Container/Dockerfile.deeploy
+++ b/Container/Dockerfile.deeploy
@@ -86,6 +86,9 @@ WORKDIR /app
 
 COPY pyproject.toml ./
 
+# Add nvidia channel to the pip configuration
+RUN mkdir -p /etc && printf "[global]\nextra-index-url = https://pypi.ngc.nvidia.com\n" > /etc/pip.conf
+
 # Install dependencies
 RUN mkdir -p /root/.cargo/bin/ && \
     apt-get update && \
@@ -103,7 +106,6 @@ RUN mkdir -p /root/.cargo/bin/ && \
     curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
     python get-pip.py && \
     rm get-pip.py && \
-    # pip install nvidia-pyindex && \
     pip install toml-to-requirements && \
     toml-to-req --toml-file pyproject.toml && \
     pip install -r requirements.txt

From d245a3776de11d0484e6c4207d0ec7b1cc1304aa Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 16 Dec 2025 15:16:01 +0100
Subject: [PATCH 04/51] Update banshee.patch

---
 toolchain/banshee.patch | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/toolchain/banshee.patch b/toolchain/banshee.patch
index 6e982afcb8..b504b0b7f2 100644
--- a/toolchain/banshee.patch
+++ b/toolchain/banshee.patch
@@ -1,8 +1,8 @@
 diff --git a/Cargo.toml b/Cargo.toml
-index d406357..eb5ce8e 100644
+index d406357..f957ba6 100644
 --- a/Cargo.toml
 +++ b/Cargo.toml
-@@ -11,30 +11,32 @@ edition = "2018"
+@@ -11,30 +11,33 @@ edition = "2018"
  build = "build/build.rs"
 
  [dependencies]
@@ -26,31 +26,34 @@ index d406357..eb5ce8e 100644
 -itertools = "0.9"
 -llvm-sys = "120"
 +itertools = "=0.9"
-+llvm-sys = "150"
++llvm-sys = "=150"
  log = { version = "0.4", features = ["release_max_level_info"] }
 -pest = "2.1.3"
 -pest_derive = "2.1.0"
 -ndarray = "0.13"
 -pretty_env_logger = "0.4"
-+pest = "=2.1.3"
-+pest_derive = "=2.1.0"
-+ndarray = "=0.13"
-+pretty_env_logger = "=0.4"
- regex = "~1.9.6"
+-regex = "~1.9.6"
 -rev_slice = "0.1.5"
-+rev_slice = "=0.1.5"
- serde = { version = "1.0.123", features = ["derive"] }
+-serde = { version = "1.0.123", features = ["derive"] }
 -serde_json = "1.0.63"
 -serde_yaml = "0.8"
 -termion = "2.0.3"
 -thiserror = "1.0.21"
 -to-binary = "0.4.0"
++pest = "=2.1.3"
++pest_derive = "=2.1.0"
++ndarray = "=0.13"
++pretty_env_logger = "=0.4"
++regex = "=1.9.6"
++rev_slice = "=0.1.5"
++serde = { version = "=1.0.123", features = ["derive"] }
 +serde_json = "=1.0.63"
 +serde_yaml = "=0.8"
 +termion = "=2.0.3"
 +thiserror = "=1.0.21"
 +to-binary = "=0.4.0"
-+libc = "0.2"
++libc = "=0.2"
++quote = "=1.0.41"
 
  [build-dependencies]
  cc = "1.0"
@@ -95,4 +98,4 @@ index 1054744..ae5ae78 100644
 +static NONAME: &'static libc::c_char = unsafe { std::mem::transmute("\0".as_ptr()) };
 
  /// Base address of the stream semantic regsiters
- static SSR_BASE: u64 = 0x204800;
+ static SSR_BASE: u64 = 0x204800;
\ No newline at end of file

From 05c020a5621e091f08434c81e31bce0e5d6a3a4a Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 16 Dec 2025 15:33:28 +0100
Subject: [PATCH 05/51] Fix patch

---
 toolchain/banshee.patch | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/toolchain/banshee.patch b/toolchain/banshee.patch
index b504b0b7f2..0b294bf306 100644
--- a/toolchain/banshee.patch
+++ b/toolchain/banshee.patch
@@ -98,4 +98,4 @@ index 1054744..ae5ae78 100644
 +static NONAME: &'static libc::c_char = unsafe { std::mem::transmute("\0".as_ptr()) };
 
  /// Base address of the stream semantic regsiters
- static SSR_BASE: u64 = 0x204800;
\ No newline at end of file
+ static SSR_BASE: u64 = 0x204800;

From ec5efd5095a4f304e0a00c562dfcf68dd5aad9cf Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 16 Dec 2025 15:59:22 +0100
Subject: [PATCH 06/51] Update banshee patch

---
 toolchain/banshee.patch | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/toolchain/banshee.patch b/toolchain/banshee.patch
index 0b294bf306..51fdc946ce 100644
--- a/toolchain/banshee.patch
+++ b/toolchain/banshee.patch
@@ -1,10 +1,10 @@
 diff --git a/Cargo.toml b/Cargo.toml
-index d406357..f957ba6 100644
+index d406357..f14c0f7 100644
 --- a/Cargo.toml
 +++ b/Cargo.toml
-@@ -11,30 +11,33 @@ edition = "2018"
+@@ -11,30 +11,34 @@ edition = "2018"
  build = "build/build.rs"
-
+ 
  [dependencies]
 -anyhow = "1"
 -binread = "2.2.0"
@@ -25,9 +25,7 @@ index d406357..f957ba6 100644
  flexfloat = { path = "flexfloat" }
 -itertools = "0.9"
 -llvm-sys = "120"
-+itertools = "=0.9"
-+llvm-sys = "=150"
- log = { version = "0.4", features = ["release_max_level_info"] }
+-log = { version = "0.4", features = ["release_max_level_info"] }
 -pest = "2.1.3"
 -pest_derive = "2.1.0"
 -ndarray = "0.13"
@@ -40,6 +38,9 @@ index d406357..f957ba6 100644
 -termion = "2.0.3"
 -thiserror = "1.0.21"
 -to-binary = "0.4.0"
++itertools = "=0.9"
++llvm-sys = "=150"
++log = { version = "=0.4.17", features = ["release_max_level_info"] }
 +pest = "=2.1.3"
 +pest_derive = "=2.1.0"
 +ndarray = "=0.13"
@@ -54,9 +55,10 @@ index d406357..f957ba6 100644
 +to-binary = "=0.4.0"
 +libc = "=0.2"
 +quote = "=1.0.41"
-
+ 
  [build-dependencies]
  cc = "1.0"
++cmake = "=0.1.50"
 diff --git a/build/runtime.rs b/build/runtime.rs
 index 04f80b8..c03f248 100644
 --- a/build/runtime.rs
@@ -72,16 +74,16 @@ index 04f80b8..c03f248 100644
          .status()
          .unwrap();
 diff --git a/src/engine.rs b/src/engine.rs
-index 216996b..e5abe38 100644
+index f32a539..a79a708 100644
 --- a/src/engine.rs
 +++ b/src/engine.rs
 @@ -281,7 +281,6 @@ impl Engine {
-
+ 
              LLVMPassManagerBuilderPopulateFunctionPassManager(builder, func_passes);
              LLVMAddAnalysisPasses(tm, module_passes);
 -            LLVMPassManagerBuilderPopulateLTOPassManager(builder, module_passes, 0, 1);
              LLVMPassManagerBuilderPopulateModulePassManager(builder, module_passes);
-
+ 
              // Create and run the function pass manager.
 diff --git a/src/tran.rs b/src/tran.rs
 index 1054744..ae5ae78 100644
@@ -93,9 +95,9 @@ index 1054744..ae5ae78 100644
  };
 +use libc;
  extern crate flexfloat;
-
+ 
 -static NONAME: &'static i8 = unsafe { std::mem::transmute("\0".as_ptr()) };
 +static NONAME: &'static libc::c_char = unsafe { std::mem::transmute("\0".as_ptr()) };
-
+ 
  /// Base address of the stream semantic regsiters
  static SSR_BASE: u64 = 0x204800;

From 3760ced198d7ddc832fefebbf4aab5c20cb6543c Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Wed, 17 Dec 2025 11:06:00 +0100
Subject: [PATCH 07/51] Update CI docker

---
 .github/workflows/_select-env.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_select-env.yml b/.github/workflows/_select-env.yml
index 1085c7eaa1..ba9c9d0c83 100644
--- a/.github/workflows/_select-env.yml
+++ b/.github/workflows/_select-env.yml
@@ -34,7 +34,7 @@ jobs:
           elif [[ "${{ github.ref_name }}" == "main" ]]; then
             IMAGE="ghcr.io/pulp-platform/deeploy:main"
           else
-            IMAGE="ghcr.io/pulp-platform/deeploy:devel"
+            IMAGE="ghcr.io/victor-jung/deeploy:pytest-migration"
           fi
           echo "image=${IMAGE}" >> "$GITHUB_OUTPUT"
 

From b051153be567bbb31986d93358fab745767366c9 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Wed, 17 Dec 2025 11:13:09 +0100
Subject: [PATCH 08/51] Update generic platform CI to use PyTest

---
 .github/workflows/_runner-generic.yml     | 10 +---
 .github/workflows/ci-platform-generic.yml | 61 +----------------------
 2 files changed, 4 insertions(+), 67 deletions(-)

diff --git a/.github/workflows/_runner-generic.yml b/.github/workflows/_runner-generic.yml
index 50fa0f3a3b..24853af57e 100644
--- a/.github/workflows/_runner-generic.yml
+++ b/.github/workflows/_runner-generic.yml
@@ -14,7 +14,7 @@ name: _runner-generic
       docker-image:
         required: true
         type: string
-      test-names:
+      pytest-marker:
         required: true
         type: string
 
@@ -38,14 +38,8 @@ jobs:
           key: ccache-ci
       - name: Run Test
         run: |
-          testNames="${{ inputs.test-names }}"
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          echo "$testNames" | while IFS= read -r testName; do
-            if [[ -n "$testName" ]]; then
-              echo "Running test: $testName"
-              python testRunner_generic.py -t Tests/$testName
-            fi
-          done
+          pytest test_generic.py -v -n 4 -m "${{ inputs.pytest-marker }}"
         shell: bash
diff --git a/.github/workflows/ci-platform-generic.yml b/.github/workflows/ci-platform-generic.yml
index fb39a9bd53..83c191180f 100644
--- a/.github/workflows/ci-platform-generic.yml
+++ b/.github/workflows/ci-platform-generic.yml
@@ -35,51 +35,7 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        Adder
-        MultIO
-        test1DConvolution
-        test2DConvolution
-        test1DDWConvolution
-        test2DDWConvolution
-        test1DPad
-        test2DPad
-        testGEMM
-        testMatMul
-        testMatMulAdd
-        testMaxPool
-        testRQConv
-        testRQMatMul
-        testReduceSum
-        testReduceMean
-        testSlice
-        testRequantizedDWConv
-        test2DRequantizedConv
-        iSoftmax
-        testFloatAdder
-        testFloatGEMM
-        testFloat2DConvolution
-        testFloat2DConvolutionBias
-        testFloat2DConvolutionZeroBias
-        testFloatLayerNorm
-        testFloatDiv
-        testFloat2DDWConvolution
-        testFloat2DDWConvolutionBias
-        testFloat2DDWConvolutionZeroBias
-        testFloatRelu
-        testFloatMaxPool
-        testFloatMatmul
-        testFloatReshapeWithSkipConnection
-        testFloatSoftmax
-        testFloatTranspose
-        testFloatMul
-        testFloatPowScalar
-        testFloatPowVector
-        testFloatSqrt
-        testFloatRMSNorm
-        Quant
-        Dequant
-        QuantizedLinear
+      pytest-marker: "kernels"
 
   generic-models:
     needs: select-env
@@ -87,17 +43,4 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        simpleRegression
-        WaveFormer
-        simpleCNN
-        ICCT
-        ICCT_ITA
-        ICCT_8
-        ICCT_ITA_8
-        miniMobileNet
-        miniMobileNetv2
-        CCT/CCT_1_16_16_8
-        CCT/CCT_2_32_32_128_Opset20
-        testFloatDemoTinyViT
-        Autoencoder1D
+      pytest-marker: "models"

From eb6e31e429950c7f16403111971cd0c88dfb186c Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Wed, 17 Dec 2025 11:18:46 +0100
Subject: [PATCH 09/51] Lint and format

---
 DeeployTest/conftest.py               |  76 ++++++-------
 DeeployTest/testUtils/pytestRunner.py | 150 ++++++++++++++------------
 DeeployTest/test_generic.py           |  67 +++++-------
 3 files changed, 148 insertions(+), 145 deletions(-)

diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index 840f678f9f..d42a370968 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -8,86 +8,84 @@
 import coloredlogs
 import pytest
 
-from Deeploy.Logging import DEFAULT_FMT, DEFAULT_LOGGER as log
+from Deeploy.Logging import DEFAULT_FMT
+from Deeploy.Logging import DEFAULT_LOGGER as log
 
 
 def pytest_addoption(parser: pytest.Parser) -> None:
     """Native PyTest hook: add custom command-line options for Deeploy tests."""
     parser.addoption(
         "--skipgen",
-        action="store_true",
-        default=False,
-        help="Skip network generation step",
+        action = "store_true",
+        default = False,
+        help = "Skip network generation step",
     )
     parser.addoption(
         "--skipsim",
-        action="store_true",
-        default=False,
-        help="Skip simulation step (only generate and build)",
+        action = "store_true",
+        default = False,
+        help = "Skip simulation step (only generate and build)",
     )
     parser.addoption(
         "--toolchain",
-        action="store",
-        default="LLVM",
-        help="Compiler toolchain to use (LLVM or GCC)",
+        action = "store",
+        default = "LLVM",
+        help = "Compiler toolchain to use (LLVM or GCC)",
     )
     parser.addoption(
         "--toolchain-install-dir",
-        action="store",
-        default=os.environ.get("LLVM_INSTALL_DIR"),
-        help="Path to toolchain installation directory",
+        action = "store",
+        default = os.environ.get("LLVM_INSTALL_DIR"),
+        help = "Path to toolchain installation directory",
     )
     parser.addoption(
         "--cmake-args",
-        action="append",
-        default=[],
-        help="Additional CMake arguments (can be used multiple times)",
+        action = "append",
+        default = [],
+        help = "Additional CMake arguments (can be used multiple times)",
     )
 
+
 def pytest_configure(config: pytest.Config) -> None:
     """Native PyTest hook: configure pytest for Deeploy tests."""
     # Register custom markers
-    config.addinivalue_line(
-        "markers", "generic: mark test as a Generic platform test"
-    )
-    config.addinivalue_line(
-        "markers", "kernels: mark test as a kernel test (individual operators)"
-    )
-    config.addinivalue_line(
-        "markers", "models: mark test as a model test (full networks)"
-    )
-    config.addinivalue_line(
-        "markers", "slow: mark test as slow running"
-    )
-    
+    config.addinivalue_line("markers", "generic: mark test as a Generic platform test")
+    config.addinivalue_line("markers", "kernels: mark test as a kernel test (individual operators)")
+    config.addinivalue_line("markers", "models: mark test as a model test (full networks)")
+    config.addinivalue_line("markers", "slow: mark test as slow running")
+
     # Configure logging based on verbosity
     verbosity = config.option.verbose
     if verbosity >= 3:
-        coloredlogs.install(level='DEBUG', logger=log, fmt=DEFAULT_FMT)
+        coloredlogs.install(level = 'DEBUG', logger = log, fmt = DEFAULT_FMT)
     elif verbosity >= 2:
-        coloredlogs.install(level='INFO', logger=log, fmt=DEFAULT_FMT)
+        coloredlogs.install(level = 'INFO', logger = log, fmt = DEFAULT_FMT)
     else:
-        coloredlogs.install(level='WARNING', logger=log, fmt=DEFAULT_FMT)
+        coloredlogs.install(level = 'WARNING', logger = log, fmt = DEFAULT_FMT)
+
 
-@pytest.fixture(scope="session")
+@pytest.fixture(scope = "session")
 def deeploy_test_dir():
     """Return the DeeployTest directory path."""
     return Path(__file__).parent
 
-@pytest.fixture(scope="session")
+
+@pytest.fixture(scope = "session")
 def tests_dir(deeploy_test_dir):
     """Return the Tests directory path."""
     return deeploy_test_dir / "Tests"
 
-@pytest.fixture(scope="session")
+
+@pytest.fixture(scope = "session")
 def toolchain_dir(request):
     """Return the toolchain installation directory."""
     toolchain_install = request.config.getoption("--toolchain-install-dir")
     if toolchain_install is None:
-        pytest.skip(reason="LLVM_INSTALL_DIR not set")
+        pytest.skip(reason = "LLVM_INSTALL_DIR not set")
     return toolchain_install
 
-@pytest.fixture(scope="session")
+
+@pytest.fixture(scope = "session")
 def ccache_dir():
     """Setup and return ccache directory."""
     ccache_path = Path("/app/.ccache")
@@ -96,21 +94,25 @@ def ccache_dir():
         return ccache_path
     return None
 
+
 @pytest.fixture
 def skipgen(request):
     """Return whether to skip network generation."""
     return request.config.getoption("--skipgen")
 
+
 @pytest.fixture
 def skipsim(request):
     """Return whether to skip simulation."""
     return request.config.getoption("--skipsim")
 
+
 @pytest.fixture
 def toolchain(request):
     """Return the toolchain to use."""
     return request.config.getoption("--toolchain")
 
+
 @pytest.fixture
 def cmake_args(request):
     """Return additional CMake arguments."""
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index 6c46ac915f..e0d876f6c1 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -29,7 +29,7 @@ class DeeployTestConfig:
     gen_args: List[str] = None
     verbose: int = 0
     debug: bool = False
-    
+
     def __post_init__(self):
         if self.cmake_args is None:
             self.cmake_args = []
@@ -66,16 +66,16 @@ def get_test_paths(test_dir: str, platform: str, base_dir: Optional[str] = None)
         base_dir = script_path.parent.parent
     else:
         base_dir = Path(base_dir)
-    
+
     test_path = Path(test_dir)
     if not test_path.is_absolute():
         test_path = base_dir / test_dir
-    
+
     test_path = test_path.resolve()
     test_name = test_path.name
-    
+
     gen_dir_name = f"TEST_{platform.upper()}"
-    
+
     # Check if path is inside base_dir
     try:
         rel_path = test_path.relative_to(base_dir)
@@ -84,9 +84,10 @@ def get_test_paths(test_dir: str, platform: str, base_dir: Optional[str] = None)
         # Path is outside base_dir
         gen_dir = base_dir / gen_dir_name / test_name
         log.warning(f"Test path {test_path} is outside base directory. Using {gen_dir}")
-    
+
     return str(gen_dir), str(test_path), test_name
 
+
 def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
     """
     Args:
@@ -99,36 +100,40 @@ def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
     if skip:
         log.info(f"Skipping network generation for {config.test_name}")
         return
-    
+
     script_dir = Path(__file__).parent.parent
-    
+
     if config.tiling:
         generation_script = script_dir / "testMVP.py"
     else:
         generation_script = script_dir / "generateNetwork.py"
-    
+
     cmd = [
-        "python", str(generation_script),
-        "-d", config.gen_dir,
-        "-t", config.test_dir,
-        "-p", config.platform,
+        "python",
+        str(generation_script),
+        "-d",
+        config.gen_dir,
+        "-t",
+        config.test_dir,
+        "-p",
+        config.platform,
     ]
-    
+
     # Add verbosity flags
     if config.verbose > 0:
         cmd.append("-" + "v" * config.verbose)
-    
+
     # Add debug flag
     if config.debug:
         cmd.append("--debug")
-    
+
     # Add additional generation arguments
     cmd.extend(config.gen_args)
-    
+
     log.debug(f"[pytestRunner] Generation command: {' '.join(cmd)}")
-    
-    result = subprocess.run(cmd, capture_output=True, text=True)
-    
+
+    result = subprocess.run(cmd, capture_output = True, text = True)
+
     if result.returncode != 0:
         log.error(f"Network generation failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
         raise RuntimeError(f"Network generation failed for {config.test_name}")
@@ -144,13 +149,11 @@ def configure_cmake(config: DeeployTestConfig) -> None:
     """
     assert config.toolchain_install_dir is not None, \
         "LLVM_INSTALL_DIR environment variable not set"
-    
+
     cmake_cmd = os.environ.get("CMAKE", "cmake")
     if cmake_cmd == "cmake" and shutil.which("cmake") is None:
-        raise RuntimeError(
-            "CMake not found. Please install CMake or set CMAKE environment variable"
-        )
-    
+        raise RuntimeError("CMake not found. Please install CMake or set CMAKE environment variable")
+
     # Build CMake command
     cmd = [
         cmake_cmd,
@@ -161,40 +164,41 @@ def configure_cmake(config: DeeployTestConfig) -> None:
         f"-DTESTNAME={config.test_name}",
         f"-B{config.build_dir}",
     ]
-    
+
     # Add custom CMake arguments
     for arg in config.cmake_args:
         if not arg.startswith("-D"):
             arg = "-D" + arg
         cmd.append(arg)
-    
+
     # Add simulator flags
     if config.simulator == 'banshee':
         cmd.append("-Dbanshee_simulation=ON")
     else:
         cmd.append("-Dbanshee_simulation=OFF")
-    
+
     if config.simulator == 'gvsoc':
         cmd.append("-Dgvsoc_simulation=ON")
     else:
         cmd.append("-Dgvsoc_simulation=OFF")
-    
+
     # Last argument is the source directory
     script_dir = Path(__file__).parent.parent
     cmd.append(str(script_dir.parent))
-    
+
     env = os.environ.copy()
     if config.verbose >= 3:
         env["VERBOSE"] = "1"
-    
+
     log.debug(f"[pytestRunner] CMake command: {' '.join(cmd)}")
-    
-    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
-    
+
+    result = subprocess.run(cmd, capture_output = True, text = True, env = env)
+
     if result.returncode != 0:
         log.error(f"CMake configuration failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
         raise RuntimeError(f"CMake configuration failed for {config.test_name}")
 
+
 def build_binary(config: DeeployTestConfig) -> None:
     """
     Args:
@@ -204,21 +208,23 @@ def build_binary(config: DeeployTestConfig) -> None:
         RuntimeError: If build fails
     """
     cmake_cmd = os.environ.get("CMAKE", "cmake")
-    
+
     cmd = [
         cmake_cmd,
-        "--build", config.build_dir,
-        "--target", config.test_name,
+        "--build",
+        config.build_dir,
+        "--target",
+        config.test_name,
     ]
-    
+
     env = os.environ.copy()
     if config.verbose >= 3:
         env["VERBOSE"] = "1"
-    
+
     log.debug(f"[pytestRunner] Build command: {' '.join(cmd)}")
-    
-    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
-    
+
+    result = subprocess.run(cmd, capture_output = True, text = True, env = env)
+
     if result.returncode != 0:
         log.error(f"Build failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
         raise RuntimeError(f"Build failed for {config.test_name}")
@@ -238,11 +244,11 @@ def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
     """
     if skip:
         log.info(f"Skipping simulation for {config.test_name}")
-        return TestResult(success=True, error_count=0, total_count=0, stdout="Skipped")
-    
+        return TestResult(success = True, error_count = 0, total_count = 0, stdout = "Skipped")
+
     if config.simulator == 'none':
         raise RuntimeError("No simulator specified!")
-    
+
     if config.simulator == 'host':
         # Run binary directly
         binary_path = Path(config.build_dir) / "bin" / config.test_name
@@ -252,14 +258,16 @@ def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
         cmake_cmd = os.environ.get("CMAKE", "cmake")
         cmd = [
             cmake_cmd,
-            "--build", config.build_dir,
-            "--target", f"{config.simulator}_{config.test_name}",
+            "--build",
+            config.build_dir,
+            "--target",
+            f"{config.simulator}_{config.test_name}",
         ]
-    
+
     env = os.environ.copy()
     if config.verbose >= 3:
         env["VERBOSE"] = "1"
-    
+
     # Add banshee-specific logging
     if config.simulator == 'banshee':
         if config.verbose == 1:
@@ -268,17 +276,17 @@ def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
             env["BANSHEE_LOG"] = "info"
         elif config.verbose >= 3:
             env["BANSHEE_LOG"] = "debug"
-    
+
     log.debug(f"[pytestRunner] Simulation command: {' '.join(cmd)}")
-    
-    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
-    
+
+    result = subprocess.run(cmd, capture_output = True, text = True, env = env)
+
     # Parse output for error count
     output = result.stdout + result.stderr
-    
+
     # Look for "Errors: X out of Y" pattern
     error_match = re.search(r'Errors:\s*(\d+)\s*out\s*of\s*(\d+)', output)
-    
+
     if error_match:
         error_count = int(error_match.group(1))
         total_count = int(error_match.group(2))
@@ -289,22 +297,23 @@ def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
         error_count = -1
         total_count = -1
         success = False
-    
+
     # Try to parse runtime cycles
     runtime_cycles = None
     cycle_match = re.search(r'Runtime:\s*(\d+)\s*cycles', output)
     if cycle_match:
         runtime_cycles = int(cycle_match.group(1))
-    
+
     return TestResult(
-        success=success,
-        error_count=error_count,
-        total_count=total_count,
-        stdout=result.stdout,
-        stderr=result.stderr,
-        runtime_cycles=runtime_cycles,
+        success = success,
+        error_count = error_count,
+        total_count = total_count,
+        stdout = result.stdout,
+        stderr = result.stderr,
+        runtime_cycles = runtime_cycles,
     )
 
+
 def run_complete_test(config: DeeployTestConfig, skipgen: bool = False, skipsim: bool = False) -> TestResult:
     """
     Run a complete test: generate, configure, build, and simulate.
@@ -318,21 +327,22 @@ def run_complete_test(config: DeeployTestConfig, skipgen: bool = False, skipsim:
         TestResult with parsed output
     """
     log.info(f"################## Testing {config.test_name} on {config.platform} Platform ##################")
-    
+
     # Step 1: Generate network
-    generate_network(config, skip=skipgen)
-    
+    generate_network(config, skip = skipgen)
+
     # Step 2: Configure CMake
     configure_cmake(config)
-    
+
     # Step 3: Build binary
     build_binary(config)
-    
+
     # Step 4: Run simulation
-    result = run_simulation(config, skip=skipsim)
-    
+    result = run_simulation(config, skip = skipsim)
+
     return result
 
+
 def get_worker_id() -> str:
     """
     Get the pytest-xdist worker ID for parallel test execution.
diff --git a/DeeployTest/test_generic.py b/DeeployTest/test_generic.py
index 9854e161c9..0a192fd7d6 100644
--- a/DeeployTest/test_generic.py
+++ b/DeeployTest/test_generic.py
@@ -3,16 +3,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from pathlib import Path
-from typing import List
 
 import pytest
-
-from testUtils.pytestRunner import (
-    DeeployTestConfig,
-    get_test_paths,
-    run_complete_test,
-    get_worker_id,
-)
+from testUtils.pytestRunner import DeeployTestConfig, get_test_paths, get_worker_id, run_complete_test
 
 KERNEL_TESTS = [
     "Adder",
@@ -77,6 +70,7 @@
     "Autoencoder1D",
 ]
 
+
 def create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args):
     """
     Create DeeployTestConfig for a specific test.
@@ -93,29 +87,28 @@ def create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cm
     """
     platform = "Generic"
     test_dir = f"Tests/{test_name}"
-    
-    gen_dir, test_dir_abs, test_name_clean = get_test_paths(
-        test_dir, platform, base_dir=deeploy_test_dir
-    )
-    
+
+    gen_dir, test_dir_abs, test_name_clean = get_test_paths(test_dir, platform, base_dir = deeploy_test_dir)
+
     worker_id = get_worker_id()
     build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}")
-    
+
     config = DeeployTestConfig(
-        test_name=test_name_clean,
-        test_dir=test_dir_abs,
-        platform=platform,
-        simulator="host",
-        tiling=False,
-        gen_dir=gen_dir,
-        build_dir=build_dir,
-        toolchain=toolchain,
-        toolchain_install_dir=toolchain_dir,
-        cmake_args=cmake_args,
+        test_name = test_name_clean,
+        test_dir = test_dir_abs,
+        platform = platform,
+        simulator = "host",
+        tiling = False,
+        gen_dir = gen_dir,
+        build_dir = build_dir,
+        toolchain = toolchain,
+        toolchain_install_dir = toolchain_dir,
+        cmake_args = cmake_args,
     )
-    
+
     return config
 
+
 def run_and_assert_test(test_name, config, skipgen, skipsim):
     """
     Shared helper function to run a test and assert its results.
@@ -127,29 +120,27 @@ def run_and_assert_test(test_name, config, skipgen, skipsim):
         skipsim: Whether to skip simulation
     """
     # Run the complete test
-    result = run_complete_test(config, skipgen=skipgen, skipsim=skipsim)
-    
+    result = run_complete_test(config, skipgen = skipgen, skipsim = skipsim)
+
     # Assert results
-    assert result.success, (
-        f"Test {test_name} failed with {result.error_count} errors out of {result.total_count}\n"
-        f"Output:\n{result.stdout}"
-    )
-    
+    assert result.success, (f"Test {test_name} failed with {result.error_count} errors out of {result.total_count}\n"
+                            f"Output:\n{result.stdout}")
+
     if result.error_count >= 0:  # Valid parse
-        assert result.error_count == 0, (
-            f"Found {result.error_count} errors out of {result.total_count} tests"
-        )
-    
+        assert result.error_count == 0, (f"Found {result.error_count} errors out of {result.total_count} tests")
+
+
 @pytest.mark.generic
 @pytest.mark.kernels
-@pytest.mark.parametrize("test_name", KERNEL_TESTS, ids=KERNEL_TESTS)
+@pytest.mark.parametrize("test_name", KERNEL_TESTS, ids = KERNEL_TESTS)
 def test_generic_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
     config = create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args)
     run_and_assert_test(test_name, config, skipgen, skipsim)
 
+
 @pytest.mark.generic
 @pytest.mark.models
-@pytest.mark.parametrize("test_name", MODEL_TESTS, ids=MODEL_TESTS)
+@pytest.mark.parametrize("test_name", MODEL_TESTS, ids = MODEL_TESTS)
 def test_model(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
     config = create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args)
     run_and_assert_test(test_name, config, skipgen, skipsim)

From 42c9cd76fea81125be4affcf003100beb4d318d9 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Wed, 17 Dec 2025 11:39:20 +0100
Subject: [PATCH 10/51] Cleanup pytest.ini

---
 DeeployTest/pytest.ini | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/DeeployTest/pytest.ini b/DeeployTest/pytest.ini
index 22e6ec4e45..8ff98ae2dd 100644
--- a/DeeployTest/pytest.ini
+++ b/DeeployTest/pytest.ini
@@ -36,16 +36,3 @@ filterwarnings =
     error
     ignore::DeprecationWarning
     ignore::PendingDeprecationWarning
-
-# Markers are defined in conftest.py via pytest_configure
-
-# Timeout for tests (in seconds) - uncomment if pytest-timeout is installed
-# timeout = 300
-# timeout_method = thread
-
-# Coverage options - uncomment if pytest-cov is installed
-# [coverage:run]
-# source = Deeploy
-# omit = 
-#     */tests/*
-#     */test_*

From 89efc39afb942605918f867a95f4ae7eaad3198e Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Thu, 18 Dec 2025 08:56:20 +0100
Subject: [PATCH 11/51] Apply Calin's comments

---
 .github/workflows/_runner-generic.yml | 2 +-
 DeeployTest/test_generic.py           | 6 ++++++
 pyproject.toml                        | 1 -
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_runner-generic.yml b/.github/workflows/_runner-generic.yml
index 24853af57e..f9e4a796f0 100644
--- a/.github/workflows/_runner-generic.yml
+++ b/.github/workflows/_runner-generic.yml
@@ -36,7 +36,7 @@ jobs:
         with:
           path: /app/.ccache
           key: ccache-ci
-      - name: Run Test
+      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
         run: |
           cd DeeployTest
           mkdir -p /app/.ccache
diff --git a/DeeployTest/test_generic.py b/DeeployTest/test_generic.py
index 0a192fd7d6..2571482073 100644
--- a/DeeployTest/test_generic.py
+++ b/DeeployTest/test_generic.py
@@ -130,6 +130,12 @@ def run_and_assert_test(test_name, config, skipgen, skipsim):
         assert result.error_count == 0, (f"Found {result.error_count} errors out of {result.total_count} tests")
 
 
+### Markers summary ###
+# generic: tests from the generic platform
+# kernels: single kernel (or single layer) tests
+# models: full model (multiple layer) tests
+
+
 @pytest.mark.generic
 @pytest.mark.kernels
 @pytest.mark.parametrize("test_name", KERNEL_TESTS, ids = KERNEL_TESTS)
diff --git a/pyproject.toml b/pyproject.toml
index bbe7530948..0dda1a55b3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,6 @@ dependencies = [
 'mako',
 'argparse',
 'toml',
-'pytest',
 'ortools',
 'plotly',
 'coloredlogs',

From 1929253c39c834ca4686a4a6c3a44617ed6087de Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Thu, 18 Dec 2025 11:23:10 +0100
Subject: [PATCH 12/51] Refactor to better support multiple platforms and add
 cortexm to pytest suite

---
 DeeployTest/conftest.py               |   1 +
 DeeployTest/testUtils/pytestRunner.py |  71 ++++++++++++
 DeeployTest/test_cortexm_config.py    |  24 ++++
 DeeployTest/test_generic.py           | 156 +++-----------------------
 DeeployTest/test_generic_config.py    |  68 +++++++++++
 DeeployTest/test_platforms.py         | 119 ++++++++++++++++++++
 6 files changed, 297 insertions(+), 142 deletions(-)
 create mode 100644 DeeployTest/test_cortexm_config.py
 create mode 100644 DeeployTest/test_generic_config.py
 create mode 100644 DeeployTest/test_platforms.py

diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index d42a370968..6f75fea279 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -50,6 +50,7 @@ def pytest_configure(config: pytest.Config) -> None:
     """Native PyTest hook: configure pytest for Deeploy tests."""
     # Register custom markers
     config.addinivalue_line("markers", "generic: mark test as a Generic platform test")
+    config.addinivalue_line("markers", "cortexm: mark test as a Cortex-M (QEMU-ARM) platform test")
     config.addinivalue_line("markers", "kernels: mark test as a kernel test (individual operators)")
     config.addinivalue_line("markers", "models: mark test as a model test (full networks)")
     config.addinivalue_line("markers", "slow: mark test as slow running")
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index e0d876f6c1..5619bdf573 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -351,3 +351,74 @@ def get_worker_id() -> str:
         Worker ID string (e.g., 'gw0', 'gw1', 'master' for non-parallel)
     """
     return os.environ.get("PYTEST_XDIST_WORKER", "master")
+
+
+def create_test_config(
+    test_name: str,
+    platform: str,
+    simulator: Literal['gvsoc', 'banshee', 'qemu', 'vsim', 'vsim.gui', 'host', 'none'],
+    deeploy_test_dir: str,
+    toolchain: str,
+    toolchain_dir: Optional[str],
+    cmake_args: List[str],
+    tiling: bool = False,
+) -> DeeployTestConfig:
+    """
+    Create DeeployTestConfig for a specific test and platform.
+    
+    Args:
+        test_name: Name of the test
+        platform: Target platform (e.g., "Generic", "QEMU-ARM")
+        simulator: Simulator to use
+        deeploy_test_dir: Base DeeployTest directory
+        toolchain: Toolchain to use - LLVM/GCC
+        toolchain_dir: Path to toolchain installation
+        cmake_args: Additional CMake arguments
+        tiling: Whether to use tiling
+        
+    Returns:
+        DeeployTestConfig instance
+    """
+    test_dir = f"Tests/{test_name}"
+
+    gen_dir, test_dir_abs, test_name_clean = get_test_paths(test_dir, platform, base_dir = deeploy_test_dir)
+
+    worker_id = get_worker_id()
+    build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}")
+
+    config = DeeployTestConfig(
+        test_name = test_name_clean,
+        test_dir = test_dir_abs,
+        platform = platform,
+        simulator = simulator,
+        tiling = tiling,
+        gen_dir = gen_dir,
+        build_dir = build_dir,
+        toolchain = toolchain,
+        toolchain_install_dir = toolchain_dir,
+        cmake_args = cmake_args,
+    )
+
+    return config
+
+
+def run_and_assert_test(test_name: str, config: DeeployTestConfig, skipgen: bool, skipsim: bool) -> None:
+    """
+    Shared helper function to run a test and assert its results.
+    
+    Args:
+        test_name: Name of the test
+        config: DeeployTestConfig instance
+        skipgen: Whether to skip network generation
+        skipsim: Whether to skip simulation
+        
+    Raises:
+        AssertionError: If test fails or has errors
+    """
+    result = run_complete_test(config, skipgen = skipgen, skipsim = skipsim)
+
+    assert result.success, (f"Test {test_name} failed with {result.error_count} errors out of {result.total_count}\n"
+                            f"Output:\n{result.stdout}")
+
+    if result.error_count >= 0:
+        assert result.error_count == 0, (f"Found {result.error_count} errors out of {result.total_count} tests")
diff --git a/DeeployTest/test_cortexm_config.py b/DeeployTest/test_cortexm_config.py
new file mode 100644
index 0000000000..c5b2b14103
--- /dev/null
+++ b/DeeployTest/test_cortexm_config.py
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test configuration for Cortex-M (QEMU-ARM) platform."""
+
+KERNEL_TESTS = [
+    "Adder",
+    "MultIO",
+    "test1DPad",
+    "test2DPad",
+    "testMatMul",
+    "testMatMulAdd",
+    "testMaxPool",
+    "testRQConv",
+    "testReduceSum",
+    "testReduceMean",
+    "testSlice",
+]
+
+MODEL_TESTS = [
+    "simpleRegression",
+    "WaveFormer",
+]
diff --git a/DeeployTest/test_generic.py b/DeeployTest/test_generic.py
index 2571482073..afb12ee1b9 100644
--- a/DeeployTest/test_generic.py
+++ b/DeeployTest/test_generic.py
@@ -2,151 +2,23 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from pathlib import Path
+"""
+Legacy test file for Generic platform (DEPRECATED).
 
-import pytest
-from testUtils.pytestRunner import DeeployTestConfig, get_test_paths, get_worker_id, run_complete_test
+This file is kept for backwards compatibility but will be removed in the future.
+Please use test_platforms.py instead, which supports multiple platforms.
 
-KERNEL_TESTS = [
-    "Adder",
-    "MultIO",
-    "test1DConvolution",
-    "test2DConvolution",
-    "test1DDWConvolution",
-    "test2DDWConvolution",
-    "test1DPad",
-    "test2DPad",
-    "testGEMM",
-    "testMatMul",
-    "testMatMulAdd",
-    "testMaxPool",
-    "testRQConv",
-    "testRQMatMul",
-    "testReduceSum",
-    "testReduceMean",
-    "testSlice",
-    "testRequantizedDWConv",
-    "test2DRequantizedConv",
-    "iSoftmax",
-    "testFloatAdder",
-    "testFloatGEMM",
-    "testFloat2DConvolution",
-    "testFloat2DConvolutionBias",
-    "testFloat2DConvolutionZeroBias",
-    "testFloatLayerNorm",
-    "testFloatDiv",
-    "testFloat2DDWConvolution",
-    "testFloat2DDWConvolutionBias",
-    "testFloat2DDWConvolutionZeroBias",
-    "testFloatRelu",
-    "testFloatMaxPool",
-    "testFloatMatmul",
-    "testFloatReshapeWithSkipConnection",
-    "testFloatSoftmax",
-    "testFloatTranspose",
-    "testFloatMul",
-    "testFloatPowScalar",
-    "testFloatPowVector",
-    "testFloatSqrt",
-    "testFloatRMSNorm",
-    "Quant",
-    "Dequant",
-    "QuantizedLinear",
-]
+To run only Generic platform tests:
+    pytest -m generic
 
-MODEL_TESTS = [
-    "simpleRegression",
-    "WaveFormer",
-    "simpleCNN",
-    "ICCT",
-    "ICCT_ITA",
-    "ICCT_8",
-    "ICCT_ITA_8",
-    "miniMobileNet",
-    "miniMobileNetv2",
-    "CCT/CCT_1_16_16_8",
-    "CCT/CCT_2_32_32_128_Opset20",
-    "testFloatDemoTinyViT",
-    "Autoencoder1D",
-]
+To run Generic kernel tests:
+    pytest -m "generic and kernels"
 
+To run Generic model tests:
+    pytest -m "generic and models"
+"""
 
-def create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args):
-    """
-    Create DeeployTestConfig for a specific test.
-    
-    Args:
-        test_name: Name of the test
-        deeploy_test_dir: Base DeeployTest directory (from fixture)
-        toolchain: Toolchain to use - LLVM/GCC (from fixture)
-        toolchain_dir: Path to toolchain installation (from fixture)
-        cmake_args: Additional CMake arguments (from fixture)
-        
-    Returns:
-        DeeployTestConfig instance
-    """
-    platform = "Generic"
-    test_dir = f"Tests/{test_name}"
+# Import all test functions from the new centralized test file
+from test_platforms import test_generic_kernels, test_generic_models
 
-    gen_dir, test_dir_abs, test_name_clean = get_test_paths(test_dir, platform, base_dir = deeploy_test_dir)
-
-    worker_id = get_worker_id()
-    build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}")
-
-    config = DeeployTestConfig(
-        test_name = test_name_clean,
-        test_dir = test_dir_abs,
-        platform = platform,
-        simulator = "host",
-        tiling = False,
-        gen_dir = gen_dir,
-        build_dir = build_dir,
-        toolchain = toolchain,
-        toolchain_install_dir = toolchain_dir,
-        cmake_args = cmake_args,
-    )
-
-    return config
-
-
-def run_and_assert_test(test_name, config, skipgen, skipsim):
-    """
-    Shared helper function to run a test and assert its results.
-    
-    Args:
-        test_name: Name of the test
-        config: DeeployTestConfig instance
-        skipgen: Whether to skip network generation
-        skipsim: Whether to skip simulation
-    """
-    # Run the complete test
-    result = run_complete_test(config, skipgen = skipgen, skipsim = skipsim)
-
-    # Assert results
-    assert result.success, (f"Test {test_name} failed with {result.error_count} errors out of {result.total_count}\n"
-                            f"Output:\n{result.stdout}")
-
-    if result.error_count >= 0:  # Valid parse
-        assert result.error_count == 0, (f"Found {result.error_count} errors out of {result.total_count} tests")
-
-
-### Markers summary ###
-# generic: tests from the generic platform
-# kernels: single kernel (or single layer) tests
-# models: full model (multiple layer) tests
-
-
-@pytest.mark.generic
-@pytest.mark.kernels
-@pytest.mark.parametrize("test_name", KERNEL_TESTS, ids = KERNEL_TESTS)
-def test_generic_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    config = create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args)
-    run_and_assert_test(test_name, config, skipgen, skipsim)
-
-
-@pytest.mark.generic
-@pytest.mark.models
-@pytest.mark.parametrize("test_name", MODEL_TESTS, ids = MODEL_TESTS)
-def test_model(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    config = create_test_config(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args)
-    run_and_assert_test(test_name, config, skipgen, skipsim)
+__all__ = ["test_generic_kernels", "test_generic_models"]
diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py
new file mode 100644
index 0000000000..fa7f525550
--- /dev/null
+++ b/DeeployTest/test_generic_config.py
@@ -0,0 +1,68 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test configuration for Generic platform."""
+
+KERNEL_TESTS = [
+    "Adder",
+    "MultIO",
+    "test1DConvolution",
+    "test2DConvolution",
+    "test1DDWConvolution",
+    "test2DDWConvolution",
+    "test1DPad",
+    "test2DPad",
+    "testGEMM",
+    "testMatMul",
+    "testMatMulAdd",
+    "testMaxPool",
+    "testRQConv",
+    "testRQMatMul",
+    "testReduceSum",
+    "testReduceMean",
+    "testSlice",
+    "testRequantizedDWConv",
+    "test2DRequantizedConv",
+    "iSoftmax",
+    "testFloatAdder",
+    "testFloatGEMM",
+    "testFloat2DConvolution",
+    "testFloat2DConvolutionBias",
+    "testFloat2DConvolutionZeroBias",
+    "testFloatLayerNorm",
+    "testFloatDiv",
+    "testFloat2DDWConvolution",
+    "testFloat2DDWConvolutionBias",
+    "testFloat2DDWConvolutionZeroBias",
+    "testFloatRelu",
+    "testFloatMaxPool",
+    "testFloatMatmul",
+    "testFloatReshapeWithSkipConnection",
+    "testFloatSoftmax",
+    "testFloatTranspose",
+    "testFloatMul",
+    "testFloatPowScalar",
+    "testFloatPowVector",
+    "testFloatSqrt",
+    "testFloatRMSNorm",
+    "Quant",
+    "Dequant",
+    "QuantizedLinear",
+]
+
+MODEL_TESTS = [
+    "simpleRegression",
+    "WaveFormer",
+    "simpleCNN",
+    "ICCT",
+    "ICCT_ITA",
+    "ICCT_8",
+    "ICCT_ITA_8",
+    "miniMobileNet",
+    "miniMobileNetv2",
+    "CCT/CCT_1_16_16_8",
+    "CCT/CCT_2_32_32_128_Opset20",
+    "testFloatDemoTinyViT",
+    "Autoencoder1D",
+]
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
new file mode 100644
index 0000000000..7b5a49af65
--- /dev/null
+++ b/DeeployTest/test_platforms.py
@@ -0,0 +1,119 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Central test file for all platforms.
+
+This file defines the test functions with markers for all supported platforms.
+Each platform's test lists are imported from their respective config files.
+"""
+
+import pytest
+from testUtils.pytestRunner import create_test_config, run_and_assert_test
+
+# Import platform-specific test configurations
+from test_generic_config import KERNEL_TESTS as GENERIC_KERNEL_TESTS
+from test_generic_config import MODEL_TESTS as GENERIC_MODEL_TESTS
+from test_cortexm_config import KERNEL_TESTS as CORTEXM_KERNEL_TESTS
+from test_cortexm_config import MODEL_TESTS as CORTEXM_MODEL_TESTS
+
+
+### Platform Configuration ###
+PLATFORM_CONFIGS = {
+    "generic": {
+        "platform": "Generic",
+        "simulator": "host",
+        "kernel_tests": GENERIC_KERNEL_TESTS,
+        "model_tests": GENERIC_MODEL_TESTS,
+    },
+    "cortexm": {
+        "platform": "QEMU-ARM",
+        "simulator": "qemu",
+        "kernel_tests": CORTEXM_KERNEL_TESTS,
+        "model_tests": CORTEXM_MODEL_TESTS,
+    },
+}
+
+
+### Markers summary ###
+# generic: tests from the generic platform
+# cortexm: tests from the cortex-m (QEMU-ARM) platform
+# kernels: single kernel (or single layer) tests
+# models: full model (multiple layer) tests
+
+
+@pytest.mark.generic
+@pytest.mark.kernels
+@pytest.mark.parametrize("test_name", GENERIC_KERNEL_TESTS, ids = GENERIC_KERNEL_TESTS)
+def test_generic_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test Generic platform kernel tests."""
+    platform_config = PLATFORM_CONFIGS["generic"]
+    config = create_test_config(
+        test_name = test_name,
+        platform = platform_config["platform"],
+        simulator = platform_config["simulator"],
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.generic
+@pytest.mark.models
+@pytest.mark.parametrize("test_name", GENERIC_MODEL_TESTS, ids = GENERIC_MODEL_TESTS)
+def test_generic_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test Generic platform model tests."""
+    platform_config = PLATFORM_CONFIGS["generic"]
+    config = create_test_config(
+        test_name = test_name,
+        platform = platform_config["platform"],
+        simulator = platform_config["simulator"],
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.cortexm
+@pytest.mark.kernels
+@pytest.mark.parametrize("test_name", CORTEXM_KERNEL_TESTS, ids = CORTEXM_KERNEL_TESTS)
+def test_cortexm_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test Cortex-M platform kernel tests."""
+    platform_config = PLATFORM_CONFIGS["cortexm"]
+    config = create_test_config(
+        test_name = test_name,
+        platform = platform_config["platform"],
+        simulator = platform_config["simulator"],
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.cortexm
+@pytest.mark.models
+@pytest.mark.parametrize("test_name", CORTEXM_MODEL_TESTS, ids = CORTEXM_MODEL_TESTS)
+def test_cortexm_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test Cortex-M platform model tests."""
+    platform_config = PLATFORM_CONFIGS["cortexm"]
+    config = create_test_config(
+        test_name = test_name,
+        platform = platform_config["platform"],
+        simulator = platform_config["simulator"],
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)

From 393dd9d2b52778b2c19af8ecaf2e67188f7f932d Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Thu, 18 Dec 2025 19:14:42 +0100
Subject: [PATCH 13/51] Add PyTest suite for Siracusa and Siracusa Tiled

---
 Deeploy/TilingExtension/TilerExtension.py |  29 ++-
 DeeployTest/conftest.py                   |   6 +
 DeeployTest/testMVP.py                    |  11 +-
 DeeployTest/testUtils/pytestRunner.py     |  78 ++++++-
 DeeployTest/test_platforms.py             | 265 +++++++++++++++++++++-
 DeeployTest/test_siracusa_config.py       |  62 +++++
 DeeployTest/test_siracusa_tiled_config.py | 107 +++++++++
 7 files changed, 536 insertions(+), 22 deletions(-)
 create mode 100644 DeeployTest/test_siracusa_config.py
 create mode 100644 DeeployTest/test_siracusa_tiled_config.py

diff --git a/Deeploy/TilingExtension/TilerExtension.py b/Deeploy/TilingExtension/TilerExtension.py
index 27ca222e4e..b4fb4a626c 100644
--- a/Deeploy/TilingExtension/TilerExtension.py
+++ b/Deeploy/TilingExtension/TilerExtension.py
@@ -52,7 +52,7 @@ class Tiler():
     _MINIMALLOC_OUTPUT_FILENAME = "output_minimalloc"
 
     # Initialize with the list of TemplateTCFbinding
-    def __init__(self, memoryHierarchy: MemoryHierarchy):
+    def __init__(self, memoryHierarchy: MemoryHierarchy, testName: Optional[str] = None, workDir: Optional[str] = None):
 
         self.memoryHierarchy = memoryHierarchy
         self.tilerModel: Optional[TilerModel] = None
@@ -66,6 +66,23 @@ def __init__(self, memoryHierarchy: MemoryHierarchy):
         self.memoryAllocStrategy: Literal["TetrisRandom", "TetrisCo-Opt", "MiniMalloc"] = "TetrisRandom"
         self.searchStrategy: Literal["min", "max", "random-max"] = "random-max"
 
+        if workDir is not None:
+            os.makedirs(workDir, exist_ok = True)
+            minimalloc_base = os.path.join(workDir, self._MINIMALLOC_INPUT_FILENAME)
+            minimalloc_output_base = os.path.join(workDir, self._MINIMALLOC_OUTPUT_FILENAME)
+        else:
+            minimalloc_base = self._MINIMALLOC_INPUT_FILENAME
+            minimalloc_output_base = self._MINIMALLOC_OUTPUT_FILENAME
+        
+        if testName is not None:
+            # VJUNG: Sanitize path
+            safe_test_name = testName.replace("/", "_").replace("\\", "_")
+            self._minimalloc_input = f"{minimalloc_base}_{safe_test_name}"
+            self._minimalloc_output = f"{minimalloc_output_base}_{safe_test_name}"
+        else:
+            self._minimalloc_input = minimalloc_base
+            self._minimalloc_output = minimalloc_output_base
+
     @property
     def worstCaseBufferSize(self):
         return self._worstCaseBufferSize
@@ -238,7 +255,7 @@ def _convertCtxtToStaticSchedule(self, ctxt: NetworkContext,
 
     def minimalloc(self, memoryMap, ctxt, nodeMemoryConstraint, capacity: int, memoryLevel: str):
 
-        with open(f"{self._MINIMALLOC_INPUT_FILENAME}.csv", mode = "w", newline = "") as file:
+        with open(f"{self._minimalloc_input}.csv", mode = "w", newline = "") as file:
             writer = csv.writer(file, lineterminator = "\n")
             writer.writerow(["id", "lower", "upper", "size"])
             for memoryBlock in memoryMap:
@@ -273,7 +290,7 @@ def minimalloc(self, memoryMap, ctxt, nodeMemoryConstraint, capacity: int, memor
 
         minimallocOutput = subprocess.run([
             f"{minimallocInstallDir}/minimalloc", f"--capacity={capacity}",
-            f"--input={self._MINIMALLOC_INPUT_FILENAME}.csv", f"--output={self._MINIMALLOC_OUTPUT_FILENAME}.csv"
+            f"--input={self._minimalloc_input}.csv", f"--output={self._minimalloc_output}.csv"
         ],
                                           capture_output = True,
                                           text = True)
@@ -284,7 +301,7 @@ def minimalloc(self, memoryMap, ctxt, nodeMemoryConstraint, capacity: int, memor
             )
             raise subprocess.CalledProcessError(minimallocOutput.returncode, " ".join(minimallocOutput.args))
 
-        with open(f"{self._MINIMALLOC_OUTPUT_FILENAME}.csv", mode = "r", newline = "") as file:
+        with open(f"{self._minimalloc_output}.csv", mode = "r", newline = "") as file:
             reader = csv.reader(file)
             header = next(reader)
             for row in reader:
@@ -944,11 +961,11 @@ def testMemoryMapCorrectness(self, memoryMap: Dict[str, List[List[MemoryBlock]]]
 
 class TilerDeployerWrapper(NetworkDeployerWrapper):
 
-    def __init__(self, deployer: Union[MemoryLevelAwareDeployer, MemoryDeployerWrapper], tilerCls: Type[Tiler] = Tiler):
+    def __init__(self, deployer: Union[MemoryLevelAwareDeployer, MemoryDeployerWrapper], tilerCls: Type[Tiler] = Tiler, testName: Optional[str] = None, workDir: Optional[str] = None):
         super().__init__(deployer)
         assert isinstance(self.Platform, (MemoryPlatform, MemoryPlatformWrapper)), \
             f"Platform should be a MemoryPlatform or MemoryPlatformWrapper! Got {type(self.Platform).__name__}"
-        self.tiler = tilerCls(self.Platform.memoryHierarchy)
+        self.tiler = tilerCls(self.Platform.memoryHierarchy, testName = testName, workDir = workDir)
 
     @property
     def worstCaseBufferSize(self):
diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index 6f75fea279..c671152d4f 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -51,8 +51,14 @@ def pytest_configure(config: pytest.Config) -> None:
     # Register custom markers
     config.addinivalue_line("markers", "generic: mark test as a Generic platform test")
     config.addinivalue_line("markers", "cortexm: mark test as a Cortex-M (QEMU-ARM) platform test")
+    config.addinivalue_line("markers", "siracusa: mark test as a Siracusa platform test (untiled)")
+    config.addinivalue_line("markers", "siracusa_tiled: mark test as a Siracusa platform test (tiled)")
     config.addinivalue_line("markers", "kernels: mark test as a kernel test (individual operators)")
     config.addinivalue_line("markers", "models: mark test as a model test (full networks)")
+    config.addinivalue_line("markers", "singlebuffer: mark test as single-buffer configuration")
+    config.addinivalue_line("markers", "doublebuffer: mark test as double-buffer configuration")
+    config.addinivalue_line("markers", "l2: mark test as L2 default memory level")
+    config.addinivalue_line("markers", "l3: mark test as L3 default memory level")
     config.addinivalue_line("markers", "slow: mark test as slow running")
 
     # Configure logging based on verbosity
diff --git a/DeeployTest/testMVP.py b/DeeployTest/testMVP.py
index 4b1ebef20b..5c81be8a06 100644
--- a/DeeployTest/testMVP.py
+++ b/DeeployTest/testMVP.py
@@ -5,6 +5,7 @@
 import argparse
 import os
 import sys
+import hashlib
 from collections import OrderedDict
 from typing import List, Tuple
 
@@ -115,14 +116,18 @@ def setupDeployer(graph: gs.Graph, memoryHierarchy: MemoryHierarchy, defaultTarg
     deployer = MemoryDeployerWrapper(deployer, memoryLevelAnnotationPasses)
 
     # Make the deployer tiler aware
+    # VJUNG: Create unique ID for the IO files of minimalloc and prevent conflict in case of parallel execution
+    unique_params = f"{args.dumpdir}_L1{args.l1}_L2{args.l2}_{args.defaultMemLevel}_DB{args.doublebuffer}"
+    testIdentifier = hashlib.md5(unique_params.encode()).hexdigest()[:16]
+    
     if args.doublebuffer:
         assert args.defaultMemLevel in ["L3", "L2"]
         if args.defaultMemLevel == "L3":
-            deployer = TilerDeployerWrapper(deployer, DBOnlyL3Tiler)
+            deployer = TilerDeployerWrapper(deployer, DBOnlyL3Tiler, testName = testIdentifier, workDir = args.dumpdir)
         else:
-            deployer = TilerDeployerWrapper(deployer, DBTiler)
+            deployer = TilerDeployerWrapper(deployer, DBTiler, testName = testIdentifier, workDir = args.dumpdir)
     else:
-        deployer = TilerDeployerWrapper(deployer, SBTiler)
+        deployer = TilerDeployerWrapper(deployer, SBTiler, testName = testIdentifier, workDir = args.dumpdir)
 
     deployer.tiler.visualizeMemoryAlloc = args.plotMemAlloc
     deployer.tiler.memoryAllocStrategy = args.memAllocStrategy
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index 5619bdf573..08e7949353 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -6,6 +6,7 @@
 import re
 import shutil
 import subprocess
+import sys
 from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Literal, Optional, Tuple
@@ -132,10 +133,10 @@ def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
 
     log.debug(f"[pytestRunner] Generation command: {' '.join(cmd)}")
 
-    result = subprocess.run(cmd, capture_output = True, text = True)
+    result = subprocess.run(cmd, check = False)
 
     if result.returncode != 0:
-        log.error(f"Network generation failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
+        log.error(f"Network generation failed with return code {result.returncode}")
         raise RuntimeError(f"Network generation failed for {config.test_name}")
 
 
@@ -192,10 +193,10 @@ def configure_cmake(config: DeeployTestConfig) -> None:
 
     log.debug(f"[pytestRunner] CMake command: {' '.join(cmd)}")
 
-    result = subprocess.run(cmd, capture_output = True, text = True, env = env)
+    result = subprocess.run(cmd, check = False, env = env)
 
     if result.returncode != 0:
-        log.error(f"CMake configuration failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
+        log.error(f"CMake configuration failed with return code {result.returncode}")
         raise RuntimeError(f"CMake configuration failed for {config.test_name}")
 
 
@@ -223,10 +224,10 @@ def build_binary(config: DeeployTestConfig) -> None:
 
     log.debug(f"[pytestRunner] Build command: {' '.join(cmd)}")
 
-    result = subprocess.run(cmd, capture_output = True, text = True, env = env)
+    result = subprocess.run(cmd, check = False, env = env)
 
     if result.returncode != 0:
-        log.error(f"Build failed:\nSTDOUT:\n{result.stdout}\nSTDERR:\n{result.stderr}")
+        log.error(f"Build failed with return code {result.returncode}")
         raise RuntimeError(f"Build failed for {config.test_name}")
 
 
@@ -281,6 +282,12 @@ def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
 
     result = subprocess.run(cmd, capture_output = True, text = True, env = env)
 
+    # Print captured output so it's visible when running with pytest -s
+    if result.stdout:
+        print(result.stdout, end = '')
+    if result.stderr:
+        print(result.stderr, end = '', file = sys.stderr)
+
     # Parse output for error count
     output = result.stdout + result.stderr
 
@@ -362,19 +369,39 @@ def create_test_config(
     toolchain_dir: Optional[str],
     cmake_args: List[str],
     tiling: bool = False,
+    cores: Optional[int] = None,
+    l1: Optional[int] = None,
+    l2: int = 1024000,
+    default_mem_level: str = "L2",
+    double_buffer: bool = False,
+    mem_alloc_strategy: str = "MiniMalloc",
+    search_strategy: str = "random-max",
+    profile_tiling: bool = False,
+    plot_mem_alloc: bool = False,
+    randomized_mem_scheduler: bool = False,
 ) -> DeeployTestConfig:
     """
     Create DeeployTestConfig for a specific test and platform.
     
     Args:
         test_name: Name of the test
-        platform: Target platform (e.g., "Generic", "QEMU-ARM")
+        platform: Target platform (e.g., "Generic", "QEMU-ARM", "Siracusa")
         simulator: Simulator to use
         deeploy_test_dir: Base DeeployTest directory
         toolchain: Toolchain to use - LLVM/GCC
         toolchain_dir: Path to toolchain installation
         cmake_args: Additional CMake arguments
         tiling: Whether to use tiling
+        cores: Number of cores (for Siracusa platforms)
+        l1: L1 memory size in bytes (for tiled platforms)
+        l2: L2 memory size in bytes (default: 1024000)
+        default_mem_level: Default memory level ("L2" or "L3")
+        double_buffer: Enable double buffering
+        mem_alloc_strategy: Memory allocation strategy
+        search_strategy: CP solver search strategy
+        profile_tiling: Enable tiling profiling
+        plot_mem_alloc: Enable memory allocation plotting
+        randomized_mem_scheduler: Enable randomized memory scheduler
         
     Returns:
         DeeployTestConfig instance
@@ -384,7 +411,39 @@ def create_test_config(
     gen_dir, test_dir_abs, test_name_clean = get_test_paths(test_dir, platform, base_dir = deeploy_test_dir)
 
     worker_id = get_worker_id()
-    build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}")
+    
+    # VJUNG: Build dir has to be unique for each worker to prevent conflict
+    build_suffix = Path(gen_dir).name
+    build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}_{build_suffix}")
+
+    cmake_args_list = list(cmake_args) if cmake_args else []
+    if cores is not None:
+        cmake_args_list.append(f"NUM_CORES={cores}")
+
+    gen_args_list = []
+    
+    if cores is not None and platform in ["Siracusa", "Siracusa_w_neureka"]:
+        gen_args_list.append(f"--cores={cores}")
+    
+    if tiling:
+        if l1 is not None:
+            gen_args_list.append(f"--l1={l1}")
+        if l2 != 1024000:
+            gen_args_list.append(f"--l2={l2}")
+        if default_mem_level != "L2":
+            gen_args_list.append(f"--defaultMemLevel={default_mem_level}")
+        if double_buffer:
+            gen_args_list.append("--doublebuffer")
+        if mem_alloc_strategy != "MiniMalloc":
+            gen_args_list.append(f"--memAllocStrategy={mem_alloc_strategy}")
+        if search_strategy != "random-max":
+            gen_args_list.append(f"--searchStrategy={search_strategy}")
+        if profile_tiling:
+            gen_args_list.append("--profileTiling")
+        if plot_mem_alloc:
+            gen_args_list.append("--plotMemAlloc")
+        if randomized_mem_scheduler:
+            gen_args_list.append("--randomizedMemoryScheduler")
 
     config = DeeployTestConfig(
         test_name = test_name_clean,
@@ -396,7 +455,8 @@ def create_test_config(
         build_dir = build_dir,
         toolchain = toolchain,
         toolchain_install_dir = toolchain_dir,
-        cmake_args = cmake_args,
+        cmake_args = cmake_args_list,
+        gen_args = gen_args_list,
     )
 
     return config
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index 7b5a49af65..c94e63f857 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -17,6 +17,40 @@
 from test_generic_config import MODEL_TESTS as GENERIC_MODEL_TESTS
 from test_cortexm_config import KERNEL_TESTS as CORTEXM_KERNEL_TESTS
 from test_cortexm_config import MODEL_TESTS as CORTEXM_MODEL_TESTS
+from test_siracusa_config import KERNEL_TESTS as SIRACUSA_KERNEL_TESTS
+from test_siracusa_config import MODEL_TESTS as SIRACUSA_MODEL_TESTS
+from test_siracusa_config import DEFAULT_CORES as SIRACUSA_DEFAULT_CORES
+from test_siracusa_tiled_config import (
+    L2_SINGLEBUFFER_KERNELS,
+    L2_DOUBLEBUFFER_KERNELS,
+    L2_SINGLEBUFFER_MODELS,
+    L3_SINGLEBUFFER_MODELS,
+    L3_DOUBLEBUFFER_MODELS,
+)
+
+
+def generate_test_params(test_dict, config_name):
+    """
+    Generate test parameters from a dictionary of test names to L1 values.
+    
+    Args:
+        test_dict: Dictionary mapping test_name -> list of L1 values
+        config_name: Configuration name for test ID (e.g., "L2-singlebuffer")
+        
+    Returns:
+        List of (test_name, l1_value, config_name) tuples
+    """
+    params = []
+    for test_name, l1_values in test_dict.items():
+        for l1 in l1_values:
+            params.append((test_name, l1, config_name))
+    return params
+
+
+def param_id(param):
+    """Generate test ID from parameter tuple."""
+    test_name, l1, config = param
+    return f"{test_name}-{l1}-{config}"
 
 
 ### Platform Configuration ###
@@ -37,10 +71,19 @@
 
 
 ### Markers summary ###
-# generic: tests from the generic platform
-# cortexm: tests from the cortex-m (QEMU-ARM) platform
-# kernels: single kernel (or single layer) tests
-# models: full model (multiple layer) tests
+# Platform markers:
+#   generic: tests from the generic platform
+#   cortexm: tests from the cortex-m (QEMU-ARM) platform
+#   siracusa: tests from the Siracusa platform (untiled)
+#   siracusa_tiled: tests from the Siracusa platform (tiled)
+# Test type markers:
+#   kernels: single kernel (or single layer) tests
+#   models: full model (multiple layer) tests
+# Configuration markers (tiled platforms):
+#   singlebuffer: single-buffer tests
+#   doublebuffer: double-buffer tests
+#   l2: L2 default memory level
+#   l3: L3 default memory level
 
 
 @pytest.mark.generic
@@ -117,3 +160,217 @@ def test_cortexm_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
         tiling = False,
     )
     run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+### Siracusa Platform Tests ###
+
+
+@pytest.mark.siracusa
+@pytest.mark.kernels
+@pytest.mark.parametrize("test_name", SIRACUSA_KERNEL_TESTS, ids = SIRACUSA_KERNEL_TESTS)
+def test_siracusa_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test Siracusa platform kernel tests (untiled)."""
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = False,
+        cores = SIRACUSA_DEFAULT_CORES,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa
+@pytest.mark.models
+@pytest.mark.parametrize("test_name", SIRACUSA_MODEL_TESTS, ids = SIRACUSA_MODEL_TESTS)
+def test_siracusa_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test Siracusa platform model tests (untiled)."""
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = False,
+        cores = SIRACUSA_DEFAULT_CORES,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+### Siracusa Tiled Platform Tests ###
+
+
+@pytest.mark.siracusa_tiled
+@pytest.mark.kernels
+@pytest.mark.singlebuffer
+@pytest.mark.l2
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(L2_SINGLEBUFFER_KERNELS, "L2-singlebuffer"),
+    ids = param_id,
+)
+def test_siracusa_tiled_kernels_l2_singlebuffer(
+    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
+) -> None:
+    """Test Siracusa tiled kernel tests (L2, single-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = SIRACUSA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L2",
+        double_buffer = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = SIRACUSA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L2",
+        double_buffer = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa_tiled
+@pytest.mark.kernels
+@pytest.mark.doublebuffer
+@pytest.mark.l2
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(L2_DOUBLEBUFFER_KERNELS, "L2-doublebuffer"),
+    ids = param_id,
+)
+def test_siracusa_tiled_kernels_l2_doublebuffer(
+    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
+) -> None:
+    """Test Siracusa tiled kernel tests (L2, double-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = SIRACUSA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L2",
+        double_buffer = True,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa_tiled
+@pytest.mark.models
+@pytest.mark.singlebuffer
+@pytest.mark.l2
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(L2_SINGLEBUFFER_MODELS, "L2-singlebuffer"),
+    ids = param_id,
+)
+def test_siracusa_tiled_models_l2_singlebuffer(
+    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
+) -> None:
+    """Test Siracusa tiled model tests (L2, single-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = SIRACUSA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L2",
+        double_buffer = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa_tiled
+@pytest.mark.models
+@pytest.mark.singlebuffer
+@pytest.mark.l3
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(L3_SINGLEBUFFER_MODELS, "L3-singlebuffer"),
+    ids = param_id,
+)
+def test_siracusa_tiled_models_l3_singlebuffer(
+    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
+) -> None:
+    """Test Siracusa tiled model tests (L3, single-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = SIRACUSA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L3",
+        double_buffer = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa_tiled
+@pytest.mark.models
+@pytest.mark.doublebuffer
+@pytest.mark.l3
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(L3_DOUBLEBUFFER_MODELS, "L3-doublebuffer"),
+    ids = param_id,
+)
+def test_siracusa_tiled_models_l3_doublebuffer(
+    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
+) -> None:
+    """Test Siracusa tiled model tests (L3, double-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = SIRACUSA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L3",
+        double_buffer = True,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
diff --git a/DeeployTest/test_siracusa_config.py b/DeeployTest/test_siracusa_config.py
new file mode 100644
index 0000000000..371cb273e3
--- /dev/null
+++ b/DeeployTest/test_siracusa_config.py
@@ -0,0 +1,62 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+PLATFORM_NAME = "Siracusa"
+SIMULATOR = "gvsoc"
+DEFAULT_CORES = 8
+
+KERNEL_TESTS = [
+    "Adder",
+    "MultIO",
+    "test1DPad",
+    "test2DPad",
+    "testMatMul",
+    "testMatMulAdd",
+    "testRequantizedDWConv",
+    "test2DRequantizedConv",
+    "iSoftmax",
+    "testConcat",
+    "testRMSNorm",
+    "trueIntegerDivSandwich",
+    "Hardswish",
+    "RQHardswish",
+    "testBacktracking",
+    "testFloatAdder",
+    "testFloatGEMM",
+    "testFloat2DConvolution",
+    "testFloat2DConvolutionBias",
+    "testFloat2DConvolutionZeroBias",
+    "testFloat2DDWConvolution",
+    "testFloat2DDWConvolutionBias",
+    "testFloat2DDWConvolutionZeroBias",
+    "testFloatLayerNorm",
+    "testFloatRelu",
+    "testFloatMaxPool",
+    "testFloatMatmul",
+    "testFloatSoftmax",
+    "testFloatTranspose",
+    "testFloatMul",
+    "Quant",
+    "Dequant",
+    "testFloatReduceSum",
+    "testFloatReshapeWithSkipConnection",
+    "testFloatSoftmaxGrad",
+    "testFloatSoftmaxCrossEntropy",
+    "testFloatSoftmaxCrossEntropyGrad",
+    "QuantizedLinear",
+]
+
+MODEL_TESTS = [
+    "simpleRegression",
+    "miniMobileNet",
+    "miniMobileNetv2",
+    "Attention",
+    "MLPerf/KeywordSpotting",
+    "MLPerf/ImageClassification",
+    "MLPerf/AnomalyDetection",
+    "CCT/CCT_1_16_16_8",
+    "CCT/CCT_2_32_32_128_Opset20",
+    "testFloatDemoTinyViT",
+]
diff --git a/DeeployTest/test_siracusa_tiled_config.py b/DeeployTest/test_siracusa_tiled_config.py
new file mode 100644
index 0000000000..4f69998f56
--- /dev/null
+++ b/DeeployTest/test_siracusa_tiled_config.py
@@ -0,0 +1,107 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+PLATFORM_NAME = "Siracusa"
+SIMULATOR = "gvsoc"
+DEFAULT_CORES = 8
+DEFAULT_L2 = 1024000
+DEFAULT_MEM_ALLOC_STRATEGY = "MiniMalloc"
+DEFAULT_SEARCH_STRATEGY = "random-max"
+
+L2_SINGLEBUFFER_KERNELS = {
+    "testMatMul": [64000, 32000, 16000],
+    "test2DRequantizedConv": [8000, 6000, 4000],
+    "test2DRequantizedStriddedPaddedConv": [600],
+    "testRequantizedDWConv": [2561],
+    "iSoftmax": [800, 500, 300],
+    "testConcat": [32000, 16000, 8000],
+    "testRMSNorm": [2048, 1024, 512],
+    "Hardswish": [750],
+    "RQHardswish": [750],
+    "testFloatGEMM": [8000],
+    "testFloat2DConvolution": [1600],
+    "testFloat2DConvolutionBias": [6600],
+    "testFloat2DConvolutionZeroBias": [6600],
+    "testFloat2DDWConvolution": [7200],
+    "testFloat2DDWConvolutionBias": [7200],
+    "testFloat2DDWConvolutionZeroBias": [7200],
+    "testFloatLayerNorm": [2000],
+    "testFloatMaxPool": [2000],
+    "testFloatMatmul": [2000],
+    "testFloatRelu": [2000],
+    "testFloatReshapeWithSkipConnection": [1400],
+    "testFloatSoftmax": [4000],
+    "testFloatTranspose": [2000],
+    "testFloatMul": [2000],
+    "largeFloatAdd": [220000],
+    "testRQGEMMwBatch": [20000],
+    "testMatMulBatch": [20000],
+}
+
+L2_DOUBLEBUFFER_KERNELS = {
+    "testMatMul": [64000, 32000, 16000],
+    "test2DRequantizedConv": [8000, 6000, 5000],
+    "testRequantizedDWConv": [5121],
+    "iSoftmax": [1600, 1000, 600],
+    "testConcat": [64000, 32000, 16000],
+    "testRMSNorm": [4096, 2048, 1024],
+    "Hardswish": [750],
+    "RQHardswish": [800],
+    "testFloatGEMM": [8000],
+    "testFloat2DConvolution": [2000],
+    "testFloat2DConvolutionBias": [8800],
+    "testFloat2DConvolutionZeroBias": [8800],
+    "testFloat2DDWConvolution": [9800],
+    "testFloat2DDWConvolutionBias": [10000],
+    "testFloat2DDWConvolutionZeroBias": [9800],
+    "testFloatLayerNorm": [2000],
+    "testFloatMaxPool": [5000],
+    "testFloatMatmul": [5000],
+    "testFloatRelu": [20],
+    "testFloatReshapeWithSkipConnection": [2600],
+    "testFloatSoftmax": [8000],
+    "testFloatTranspose": [2000],
+    "testFloatMul": [2000],
+}
+
+L2_SINGLEBUFFER_MODELS = {
+    "simpleRegression": [45000, 30000, 15000],
+    "miniMobileNet": [60000, 12000, 6000, 3000],
+    "miniMobileNetv2": [60000, 16000, 12000, 8000],
+    "Attention": [60000, 10000, 5000],
+    "microLlama/microLlama1": [60000, 10000, 5000],
+    "microLlama/microLlama8": [60000, 10000, 5000],
+    "microLlama/microLlama8_parallel": [60000, 10000, 5000],
+    "MLPerf/KeywordSpotting": [64000],
+    "MLPerf/ImageClassification": [64000],
+    "MLPerf/AnomalyDetection": [64000],
+    "CCT/CCT_1_16_16_8": [64000],
+    "testFloatDemoTinyViT": [4000],
+}
+
+L3_SINGLEBUFFER_MODELS = {
+    "simpleRegression": [45000, 30000, 16000],
+    "miniMobileNet": [60000, 12000, 6000],
+    "miniMobileNetv2": [60000, 16000, 12000, 8000],
+    "Attention": [60000, 10000, 5000, 2500],
+    "Transformer": [60000, 30000, 15000],
+    "microLlama/microLlama1": [60000, 10000, 5000],
+    "CCT/CCT_2_32_32_128": [128000],
+    "testTrainCCT/CCT2_FT2": [128000],
+    "testFloatDemoTinyViT": [4000],
+}
+
+L3_DOUBLEBUFFER_MODELS = {
+    "simpleRegression": [60000, 45000, 30000],
+    "miniMobileNet": [60000, 24000, 12000, 6000],
+    "miniMobileNetv2": [60000, 32000, 24000, 16000],
+    "Attention": [60000, 20000, 10000, 5000],
+    "Transformer": [60000, 30000, 15000],
+    "microLlama/microLlama1": [60000, 20000, 10000],
+    "microLlama/microLlama8": [60000, 20000, 10000],
+    "microLlama/microLlama8_parallel": [60000, 20000, 10000],
+    "CCT/CCT_2_32_32_128": [128000],
+    "testTrainCCT/CCT2_FT2": [128000],
+    "testFloatDemoTinyViT": [4000],
+}

From 54f3e9cdf2190e8b70efffffb949fb4d45754263 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 19 Dec 2025 10:11:37 +0100
Subject: [PATCH 14/51] Alpha version of CI suite using PyTest for Siracusa and
 Siracusa Tiled

---
 .../_runner-siracusa-tiled-kernels.yml        |  40 +++
 .../_runner-siracusa-tiled-models.yml         |  49 ++++
 .../_runner-siracusa-tiled-sequential.yml     |  21 +-
 .github/workflows/_runner-siracusa-tiled.yml  |  17 +-
 .github/workflows/_runner-siracusa.yml        |  16 +-
 .../workflows/ci-platform-siracusa-tiled.yml  | 236 ++++++------------
 .github/workflows/ci-platform-siracusa.yml    |  57 +----
 DeeployTest/test_generic.py                   |  24 --
 DeeployTest/test_platforms.py                 |   7 -
 DeeployTest/test_siracusa_tiled_config.py     |  15 ++
 scripts/generate_test_matrix.py               |  57 +++++
 11 files changed, 264 insertions(+), 275 deletions(-)
 create mode 100644 .github/workflows/_runner-siracusa-tiled-kernels.yml
 create mode 100644 .github/workflows/_runner-siracusa-tiled-models.yml
 delete mode 100644 DeeployTest/test_generic.py
 create mode 100755 scripts/generate_test_matrix.py

diff --git a/.github/workflows/_runner-siracusa-tiled-kernels.yml b/.github/workflows/_runner-siracusa-tiled-kernels.yml
new file mode 100644
index 0000000000..08288e84d4
--- /dev/null
+++ b/.github/workflows/_runner-siracusa-tiled-kernels.yml
@@ -0,0 +1,40 @@
+name: Siracusa Tiled Kernels Runner
+
+on:
+  workflow_call:
+    inputs:
+      runner:
+        required: true
+        type: string
+      docker-image:
+        required: true
+        type: string
+      memory-level:
+        required: true
+        type: string
+        description: 'Memory level marker (l2 or l3)'
+      buffer-mode:
+        required: true
+        type: string
+        description: 'Buffer mode marker (singlebuffer or doublebuffer)'
+
+jobs:
+  run-tests:
+    runs-on: ${{ inputs.runner }}
+    container:
+      image: ${{ inputs.docker-image }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Install Deeploy
+        run: |
+          pip install -e .
+
+      - name: Run kernel tests
+        run: |
+          cd DeeployTest
+          pytest test_platforms.py -m "siracusa_tiled and kernels and ${{ inputs.memory-level }} and ${{ inputs.buffer-mode }}" -v
diff --git a/.github/workflows/_runner-siracusa-tiled-models.yml b/.github/workflows/_runner-siracusa-tiled-models.yml
new file mode 100644
index 0000000000..b0341f0eb0
--- /dev/null
+++ b/.github/workflows/_runner-siracusa-tiled-models.yml
@@ -0,0 +1,49 @@
+name: Siracusa Tiled Models Runner
+
+on:
+  workflow_call:
+    inputs:
+      runner:
+        required: true
+        type: string
+      docker-image:
+        required: true
+        type: string
+      test-name:
+        required: true
+        type: string
+        description: 'Test name to run'
+      memory-level:
+        required: true
+        type: string
+        description: 'Memory level marker (l2 or l3)'
+      buffer-mode:
+        required: true
+        type: string
+        description: 'Buffer mode marker (singlebuffer or doublebuffer)'
+
+jobs:
+  run-test:
+    runs-on: ${{ inputs.runner }}
+    container:
+      image: ${{ inputs.docker-image }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Install Deeploy
+        run: |
+          pip install -e .
+
+      - name: Run model test with retry
+        uses: nick-fields/retry@v3
+        with:
+          timeout_minutes: 10
+          max_attempts: 3
+          retry_on: error
+          command: |
+            cd DeeployTest
+            pytest test_platforms.py -k "${{ inputs.test-name }}-" -m "siracusa_tiled and models and ${{ inputs.memory-level }} and ${{ inputs.buffer-mode }}" -v
diff --git a/.github/workflows/_runner-siracusa-tiled-sequential.yml b/.github/workflows/_runner-siracusa-tiled-sequential.yml
index 056d8f0398..18a6ef8e0a 100644
--- a/.github/workflows/_runner-siracusa-tiled-sequential.yml
+++ b/.github/workflows/_runner-siracusa-tiled-sequential.yml
@@ -51,11 +51,6 @@ jobs:
       - name: Build Deeploy
         shell: bash
         run: pip install -e .
-      - name: Install jq
-        run: |
-          export DEBIAN_FRONTEND=noninteractive
-          apt-get update -y
-          apt-get install -y jq
       - name: Cache ccache
         uses: actions/cache/restore@v4
         with:
@@ -64,16 +59,14 @@ jobs:
       - name: Run Tests
         run: |
           cd DeeployTest
-          echo '${{ inputs.tests-config }}' > tests.json
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
 
-          jq -c '.[]' tests.json | while read test; do
-            testName=$(echo "$test" | jq -r '.name')
-            L1_values=$(echo "$test" | jq -r '.L1[]')
-            for L1_value in $L1_values; do
-              echo "Running test: $testName with L1: $L1_value"
-              python testRunner_tiled_siracusa.py -t Tests/$testName --cores=${{ inputs.num-cores }} --l1 $L1_value --defaultMemLevel=${{ inputs.default-memory-level }} ${{ inputs.double-buffer && '--doublebuffer' || '' }} --memAllocStrategy=${{ inputs.memory-allocation-strategy }} --searchStrategy=${{ inputs.search-strategy }}
-            done
-          done
+          # Determine buffer mode and memory level for pytest markers
+          BUFFER_MARKER="${{ inputs.double-buffer && 'doublebuffer' || 'singlebuffer' }}"
+          MEMLEVEL_MARKER="${{ inputs.default-memory-level == 'L3' && 'l3' || 'l2' }}"
+          
+          # Run all kernel tests matching the buffer and memory level configuration
+          echo "Running Siracusa tiled kernel tests (${MEMLEVEL_MARKER}, ${BUFFER_MARKER})"
+          pytest test_platforms.py::test_siracusa_tiled_kernels_${MEMLEVEL_MARKER}_${BUFFER_MARKER} -v
         shell: bash
diff --git a/.github/workflows/_runner-siracusa-tiled.yml b/.github/workflows/_runner-siracusa-tiled.yml
index f33836c6b8..ed8285273f 100644
--- a/.github/workflows/_runner-siracusa-tiled.yml
+++ b/.github/workflows/_runner-siracusa-tiled.yml
@@ -74,5 +74,20 @@ jobs:
             cd DeeployTest
             mkdir -p /app/.ccache
             export CCACHE_DIR=/app/.ccache
-            python testRunner_tiled_siracusa.py -t Tests/${{ inputs.test-name }} --cores=${{ inputs.num-cores }} --l1 ${{ matrix.L1 }} --defaultMemLevel=${{ inputs.default-memory-level }} ${{ inputs.double-buffer && '--doublebuffer' || '' }} --memAllocStrategy=${{ inputs.memory-allocation-strategy }} --searchStrategy=${{ inputs.search-strategy }}
+            
+            # Determine buffer mode and memory level for pytest markers
+            BUFFER_MARKER="${{ inputs.double-buffer && 'doublebuffer' || 'singlebuffer' }}"
+            MEMLEVEL_MARKER="${{ inputs.default-memory-level == 'L3' && 'l3' || 'l2' }}"
+            
+            # Determine if it's a kernel or model test
+            TEST_TYPE="kernels"
+            if [[ "${{ inputs.test-name }}" == *"/"* ]] || [[ "${{ inputs.test-name }}" =~ (simpleRegression|MobileNet|Attention|Transformer|Llama|MLPerf|CCT|TinyViT) ]]; then
+              TEST_TYPE="models"
+            fi
+            
+            # Build test ID pattern: testname-L1value-config
+            TEST_PATTERN="${{ inputs.test-name }}-${{ matrix.L1 }}-${{ inputs.default-memory-level }}-${BUFFER_MARKER}"
+            
+            # Run pytest with specific test matching the pattern
+            pytest test_platforms.py::test_siracusa_tiled_${TEST_TYPE}_${MEMLEVEL_MARKER}_${BUFFER_MARKER} -k "$TEST_PATTERN" -v
           shell: bash
diff --git a/.github/workflows/_runner-siracusa.yml b/.github/workflows/_runner-siracusa.yml
index 1972724e9c..f5f1c91f99 100644
--- a/.github/workflows/_runner-siracusa.yml
+++ b/.github/workflows/_runner-siracusa.yml
@@ -14,12 +14,10 @@ name: _runner-siracusa
       docker-image:
         required: true
         type: string
-      test-names:
+      test-type:
         required: true
         type: string
-      num-cores:
-        required: true
-        type: number
+        description: "Type of tests to run: kernels or models"
 
 jobs:
   test-runner-siracusa:
@@ -41,14 +39,10 @@ jobs:
           key: ccache-ci
       - name: Run Test
         run: |
-          testNames="${{ inputs.test-names }}"
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          echo "$testNames" | while IFS= read -r testName; do
-            if [[ -n "$testName" ]]; then
-              echo "Running test: $testName"
-              python testRunner_siracusa.py -t Tests/$testName --cores=${{ inputs.num-cores }}
-            fi
-          done
+          
+          # Run tests using pytest markers
+          pytest test_platforms.py::test_siracusa_${{ inputs.test-type }} -v
         shell: bash
diff --git a/.github/workflows/ci-platform-siracusa-tiled.yml b/.github/workflows/ci-platform-siracusa-tiled.yml
index e0006ae150..ce0f71f133 100644
--- a/.github/workflows/ci-platform-siracusa-tiled.yml
+++ b/.github/workflows/ci-platform-siracusa-tiled.yml
@@ -29,197 +29,107 @@ jobs:
     with:
       docker_image_deeploy: ${{ inputs.docker_image_deeploy }}
 
-  siracusa-kernels-tiled-singlebuffer-L2:
+  generate-matrices:
+    runs-on: ubuntu-latest
+    outputs:
+      l2-singlebuffer-models: ${{ steps.generate.outputs.l2-singlebuffer-models }}
+      l2-doublebuffer-models: ${{ steps.generate.outputs.l2-doublebuffer-models }}
+      l3-singlebuffer-models: ${{ steps.generate.outputs.l3-singlebuffer-models }}
+      l3-doublebuffer-models: ${{ steps.generate.outputs.l3-doublebuffer-models }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.10'
+
+      - name: Generate test matrices
+        id: generate
+        run: |
+          chmod +x scripts/generate_test_matrix.py
+          echo "l2-singlebuffer-models=$(scripts/generate_test_matrix.py l2-singlebuffer-models)" >> $GITHUB_OUTPUT
+          echo "l2-doublebuffer-models=$(scripts/generate_test_matrix.py l2-doublebuffer-models)" >> $GITHUB_OUTPUT
+          echo "l3-singlebuffer-models=$(scripts/generate_test_matrix.py l3-singlebuffer-models)" >> $GITHUB_OUTPUT
+          echo "l3-doublebuffer-models=$(scripts/generate_test_matrix.py l3-doublebuffer-models)" >> $GITHUB_OUTPUT
+
+  # Kernel tests - L2 singlebuffer
+  siracusa-kernels-tiled-l2-singlebuffer:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-tiled-sequential.yml
+    uses: ./.github/workflows/_runner-siracusa-tiled-kernels.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      tests-config: |
-        [
-          {"name":"testMatMul","L1":[64000,32000,16000]},
-          {"name":"test2DRequantizedConv","L1":[8000,6000,4000]},
-          {"name":"test2DRequantizedStriddedPaddedConv","L1":[600]},
-          {"name":"testRequantizedDWConv","L1":[2561]},
-          {"name":"iSoftmax","L1":[800,500,300]},
-          {"name":"testConcat","L1":[32000,16000,8000]},
-          {"name":"testRMSNorm","L1":[2048,1024,512]},
-          {"name":"Hardswish","L1":[750]},
-          {"name":"RQHardswish","L1":[750]},
-          {"name":"testFloatGEMM","L1":[8000]},
-
-          {"name":"testFloat2DConvolution","L1":[1600]},
-          {"name":"testFloat2DConvolutionBias","L1":[6600]},
-          {"name":"testFloat2DConvolutionZeroBias","L1":[6600]},
-
-          {"name":"testFloat2DDWConvolution","L1":[7200]},
-          {"name":"testFloat2DDWConvolutionBias","L1":[7200]},
-          {"name":"testFloat2DDWConvolutionZeroBias","L1":[7200]},
+      memory-level: "l2"
+      buffer-mode: "singlebuffer"
 
-          {"name":"testFloatLayerNorm","L1":[2000]},
-          {"name":"testFloatMaxPool","L1":[2000]},
-          {"name":"testFloatMatmul","L1":[2000]},
-          {"name":"testFloatRelu","L1":[2000]},
-          {"name":"testFloatReshapeWithSkipConnection","L1":[1400]},
-          {"name":"testFloatSoftmax","L1":[4000]},
-          {"name":"testFloatTranspose","L1":[2000]},
-          {"name":"testFloatMul","L1":[2000]},
-          {"name":"largeFloatAdd","L1":[220000]},
-          {"name":"testRQGEMMwBatch","L1":[20000]},
-          {"name":"testMatMulBatch","L1":[20000]}
-        ]
-      num-cores: 8
-
-  siracusa-kernels-tiled-doublebuffer-L2:
+  # Kernel tests - L2 doublebuffer
+  siracusa-kernels-tiled-l2-doublebuffer:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-tiled-sequential.yml
+    uses: ./.github/workflows/_runner-siracusa-tiled-kernels.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      tests-config: |
-        [
-          {"name":"testMatMul","L1":[64000,32000,16000]},
-          {"name":"test2DRequantizedConv","L1":[8000,6000,5000]},
-          {"name":"testRequantizedDWConv","L1":[5121]},
-          {"name":"iSoftmax","L1":[1600,1000,600]},
-          {"name":"testConcat","L1":[64000,32000,16000]},
-          {"name":"testRMSNorm","L1":[4096,2048,1024]},
-          {"name":"Hardswish","L1":[750]},
-          {"name":"RQHardswish","L1":[800]},
-          {"name":"testFloatGEMM","L1":[8000]},
-
-          {"name":"testFloat2DConvolution","L1":[2000]},
-          {"name":"testFloat2DConvolutionBias","L1":[8800]},
-          {"name":"testFloat2DConvolutionZeroBias","L1":[8800]},
+      memory-level: "l2"
+      buffer-mode: "doublebuffer"
 
-          {"name":"testFloat2DDWConvolution","L1":[9800]},
-          {"name":"testFloat2DDWConvolutionBias","L1":[10000]},
-          {"name":"testFloat2DDWConvolutionZeroBias","L1":[9800]},
-
-          {"name":"testFloatLayerNorm","L1":[2000]},
-          {"name":"testFloatMaxPool","L1":[5000]},
-          {"name":"testFloatMatmul","L1":[5000]},
-          {"name":"testFloatRelu","L1":[20]},
-          {"name":"testFloatReshapeWithSkipConnection","L1":[2600]},
-          {"name":"testFloatSoftmax","L1":[8000]},
-          {"name":"testFloatTranspose","L1":[2000]},
-          {"name":"testFloatMul","L1":[2000]}
-        ]
-      num-cores: 8
-      double-buffer: true
+  # Model tests - L2 singlebuffer
+  siracusa-models-tiled-l2-singlebuffer:
+    needs: [select-env, generate-matrices]
+    strategy:
+      fail-fast: false
+      matrix:
+        test-name: ${{ fromJSON(needs.generate-matrices.outputs.l2-singlebuffer-models) }}
+    uses: ./.github/workflows/_runner-siracusa-tiled-models.yml
+    with:
+      runner: ${{ needs.select-env.outputs.runner }}
+      docker-image: ${{ needs.select-env.outputs.image }}
+      test-name: ${{ matrix.test-name }}
+      memory-level: "l2"
+      buffer-mode: "singlebuffer"
 
-  siracusa-models-tiled-singlebuffer-L2:
-    needs: select-env
+  # Model tests - L2 doublebuffer
+  siracusa-models-tiled-l2-doublebuffer:
+    needs: [select-env, generate-matrices]
     strategy:
       fail-fast: false
       matrix:
-        test-data:
-          - name: "simpleRegression"
-            L1: [45000, 30000, 15000]
-          - name: "miniMobileNet"
-            L1: [60000, 12000, 6000, 3000]
-          - name: "miniMobileNetv2"
-            L1: [60000, 16000, 12000, 8000]
-          - name: "Attention"
-            L1: [60000, 10000, 5000]
-          - name: "microLlama/microLlama1"
-            L1: [60000, 10000, 5000]
-          - name: "microLlama/microLlama8"
-            L1: [60000, 10000, 5000]
-          - name: "microLlama/microLlama8_parallel"
-            L1: [60000, 10000, 5000]
-          - name: "MLPerf/KeywordSpotting"
-            L1: [64000]
-          - name: "MLPerf/ImageClassification"
-            L1: [64000]
-          - name: "MLPerf/AnomalyDetection"
-            L1: [64000]
-          - name: "CCT/CCT_1_16_16_8"
-            L1: [64000]
-          - name: "testFloatDemoTinyViT"
-            L1: [4000]
-        num-cores: [8]
-    uses: ./.github/workflows/_runner-siracusa-tiled.yml
+        test-name: ${{ fromJSON(needs.generate-matrices.outputs.l2-doublebuffer-models) }}
+    uses: ./.github/workflows/_runner-siracusa-tiled-models.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-data.name }}
-      num-cores: ${{ matrix.num-cores }}
-      L1: ${{ toJson(matrix.test-data.L1) }}
+      test-name: ${{ matrix.test-name }}
+      memory-level: "l2"
+      buffer-mode: "doublebuffer"
 
-  siracusa-models-tiled-singlebuffer-L3:
-    needs: select-env
+  # Model tests - L3 singlebuffer
+  siracusa-models-tiled-l3-singlebuffer:
+    needs: [select-env, generate-matrices]
     strategy:
       fail-fast: false
       matrix:
-        test-data:
-          - name: "simpleRegression"
-            L1: [45000, 30000, 16000] # SCHEREMO note
-          - name: "miniMobileNet"
-            L1: [60000, 12000, 6000] # SCHEREMO note
-          - name: "miniMobileNetv2"
-            L1: [60000, 16000, 12000, 8000]
-          - name: "Attention"
-            L1: [60000, 10000, 5000, 2500]
-          - name: "Transformer"
-            L1: [60000, 30000, 15000]
-          - name: "microLlama/microLlama1"
-            L1: [60000, 10000, 5000]
-          - name: "CCT/CCT_2_32_32_128"
-            L1: [128000]
-          - name: "testTrainCCT/CCT2_FT2"
-            L1: [128000]
-          - name: "testFloatDemoTinyViT"
-            L1: [4000]
-        num-cores: [8]
-        default-memory-level: ["L3"]
-    uses: ./.github/workflows/_runner-siracusa-tiled.yml
+        test-name: ${{ fromJSON(needs.generate-matrices.outputs.l3-singlebuffer-models) }}
+    uses: ./.github/workflows/_runner-siracusa-tiled-models.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-data.name }}
-      num-cores: ${{ matrix.num-cores }}
-      L1: ${{ toJson(matrix.test-data.L1) }}
-      default-memory-level: ${{ matrix.default-memory-level }}
+      test-name: ${{ matrix.test-name }}
+      memory-level: "l3"
+      buffer-mode: "singlebuffer"
 
-  # TEMPORARILY DISABLE L3 TRANSFER DUE TO DRIVER BUG CAUSING SPORADIC CRASH
-  siracusa-models-tiled-doublebuffer-L3:
-    needs: select-env
+  # Model tests - L3 doublebuffer
+  siracusa-models-tiled-l3-doublebuffer:
+    needs: [select-env, generate-matrices]
     strategy:
       fail-fast: false
       matrix:
-        test-data:
-          - name: "simpleRegression"
-            L1: [60000, 45000, 30000]
-          - name: "miniMobileNet"
-            L1: [60000, 24000, 12000, 6000]
-          - name: "miniMobileNetv2"
-            L1: [60000, 32000, 24000, 16000]
-          - name: "Attention"
-            L1: [60000, 20000, 10000, 5000]
-          - name: "Transformer"
-            L1: [60000, 30000, 15000]
-          - name: "microLlama/microLlama1"
-            L1: [60000, 20000, 10000]
-          - name: "microLlama/microLlama8"
-            L1: [60000, 20000, 10000]
-          - name: "microLlama/microLlama8_parallel"
-            L1: [60000, 20000, 10000]
-          - name: "CCT/CCT_2_32_32_128"
-            L1: [128000]
-          - name: "testTrainCCT/CCT2_FT2"
-            L1: [128000]
-          - name: "testFloatDemoTinyViT"
-            L1: [4000]
-        num-cores: [8]
-        double-buffer: [true]
-        default-memory-level: ["L3"]
-    uses: ./.github/workflows/_runner-siracusa-tiled.yml
+        test-name: ${{ fromJSON(needs.generate-matrices.outputs.l3-doublebuffer-models) }}
+    uses: ./.github/workflows/_runner-siracusa-tiled-models.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-data.name }}
-      num-cores: ${{ matrix.num-cores }}
-      L1: ${{ toJson(matrix.test-data.L1) }}
-      double-buffer: ${{ matrix.double-buffer }}
-      default-memory-level: ${{ matrix.default-memory-level }}
+      test-name: ${{ matrix.test-name }}
+      memory-level: "l3"
+      buffer-mode: "doublebuffer"
\ No newline at end of file
diff --git a/.github/workflows/ci-platform-siracusa.yml b/.github/workflows/ci-platform-siracusa.yml
index de5dab7f6b..9cec1ef896 100644
--- a/.github/workflows/ci-platform-siracusa.yml
+++ b/.github/workflows/ci-platform-siracusa.yml
@@ -35,49 +35,7 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        Adder
-        MultIO
-        test1DPad
-        test2DPad
-        testMatMul
-        testMatMulAdd
-        testRequantizedDWConv
-        test2DRequantizedConv
-        iSoftmax
-        testConcat
-        testRMSNorm
-        trueIntegerDivSandwich
-        Hardswish
-        RQHardswish
-        testBacktracking
-        testFloatAdder
-        testFloatGEMM
-
-        testFloat2DConvolution
-        testFloat2DConvolutionBias
-        testFloat2DConvolutionZeroBias
-
-        testFloat2DDWConvolution
-        testFloat2DDWConvolutionBias
-        testFloat2DDWConvolutionZeroBias
-
-        testFloatLayerNorm
-        testFloatRelu
-        testFloatMaxPool
-        testFloatMatmul
-        testFloatSoftmax
-        testFloatTranspose
-        testFloatMul
-        Quant
-        Dequant
-        testFloatReduceSum
-        testFloatReshapeWithSkipConnection
-        testFloatSoftmaxGrad
-        testFloatSoftmaxCrossEntropy
-        testFloatSoftmaxCrossEntropyGrad
-        QuantizedLinear
-      num-cores: 8
+      test-type: kernels
 
   siracusa-models:
     needs: select-env
@@ -85,15 +43,4 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        simpleRegression
-        miniMobileNet
-        miniMobileNetv2
-        Attention
-        MLPerf/KeywordSpotting
-        MLPerf/ImageClassification
-        MLPerf/AnomalyDetection
-        CCT/CCT_1_16_16_8
-        CCT/CCT_2_32_32_128_Opset20
-        testFloatDemoTinyViT
-      num-cores: 8
+      test-type: models
diff --git a/DeeployTest/test_generic.py b/DeeployTest/test_generic.py
deleted file mode 100644
index afb12ee1b9..0000000000
--- a/DeeployTest/test_generic.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""
-Legacy test file for Generic platform (DEPRECATED).
-
-This file is kept for backwards compatibility but will be removed in the future.
-Please use test_platforms.py instead, which supports multiple platforms.
-
-To run only Generic platform tests:
-    pytest -m generic
-
-To run Generic kernel tests:
-    pytest -m "generic and kernels"
-
-To run Generic model tests:
-    pytest -m "generic and models"
-"""
-
-# Import all test functions from the new centralized test file
-from test_platforms import test_generic_kernels, test_generic_models
-
-__all__ = ["test_generic_kernels", "test_generic_models"]
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index c94e63f857..d59b115a0f 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -2,13 +2,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-"""
-Central test file for all platforms.
-
-This file defines the test functions with markers for all supported platforms.
-Each platform's test lists are imported from their respective config files.
-"""
-
 import pytest
 from testUtils.pytestRunner import create_test_config, run_and_assert_test
 
diff --git a/DeeployTest/test_siracusa_tiled_config.py b/DeeployTest/test_siracusa_tiled_config.py
index 4f69998f56..00c0d28dfb 100644
--- a/DeeployTest/test_siracusa_tiled_config.py
+++ b/DeeployTest/test_siracusa_tiled_config.py
@@ -80,6 +80,21 @@
     "testFloatDemoTinyViT": [4000],
 }
 
+L2_DOUBLEBUFFER_MODELS = {
+    "simpleRegression": [60000, 45000, 30000],
+    "miniMobileNet": [60000, 24000, 12000, 6000],
+    "miniMobileNetv2": [60000, 32000, 24000, 16000],
+    "Attention": [60000, 20000, 10000, 5000],
+    "microLlama/microLlama1": [60000, 20000, 10000],
+    "microLlama/microLlama8": [60000, 20000, 10000],
+    "microLlama/microLlama8_parallel": [60000, 20000, 10000],
+    "MLPerf/KeywordSpotting": [128000],
+    "MLPerf/ImageClassification": [128000],
+    "MLPerf/AnomalyDetection": [128000],
+    "CCT/CCT_1_16_16_8": [128000],
+    "testFloatDemoTinyViT": [8000],
+}
+
 L3_SINGLEBUFFER_MODELS = {
     "simpleRegression": [45000, 30000, 16000],
     "miniMobileNet": [60000, 12000, 6000],
diff --git a/scripts/generate_test_matrix.py b/scripts/generate_test_matrix.py
new file mode 100755
index 0000000000..cefea79529
--- /dev/null
+++ b/scripts/generate_test_matrix.py
@@ -0,0 +1,57 @@
+#!/usr/bin/env python3
+"""
+Generate GitHub Actions test matrix from Python test configuration.
+
+This script reads test configurations from DeeployTest config files and outputs
+JSON arrays suitable for GitHub Actions matrix strategies.
+"""
+
+import json
+import sys
+from pathlib import Path
+
+# Add DeeployTest to path to import config
+sys.path.insert(0, str(Path(__file__).parent.parent / "DeeployTest"))
+
+from test_siracusa_tiled_config import (
+    L2_SINGLEBUFFER_MODELS,
+    L2_DOUBLEBUFFER_MODELS,
+    L3_SINGLEBUFFER_MODELS,
+    L3_DOUBLEBUFFER_MODELS,
+)
+
+
+def main():
+    if len(sys.argv) != 2:
+        print("Usage: generate_test_matrix.py <config-key>", file=sys.stderr)
+        print("config-key must be one of:", file=sys.stderr)
+        print("  l2-singlebuffer-models", file=sys.stderr)
+        print("  l2-doublebuffer-models", file=sys.stderr)
+        print("  l3-singlebuffer-models", file=sys.stderr)
+        print("  l3-doublebuffer-models", file=sys.stderr)
+        sys.exit(1)
+
+    config_key = sys.argv[1]
+
+    # Map config keys to Python dictionaries
+    config_map = {
+        "l2-singlebuffer-models": L2_SINGLEBUFFER_MODELS,
+        "l2-doublebuffer-models": L2_DOUBLEBUFFER_MODELS,
+        "l3-singlebuffer-models": L3_SINGLEBUFFER_MODELS,
+        "l3-doublebuffer-models": L3_DOUBLEBUFFER_MODELS,
+    }
+
+    if config_key not in config_map:
+        print(f"Error: Unknown config-key '{config_key}'", file=sys.stderr)
+        sys.exit(1)
+
+    # Extract test names from the dictionary keys
+    test_dict = config_map[config_key]
+    test_names = list(test_dict.keys())
+
+    # Output as JSON array
+    print(json.dumps(test_names))
+
+
+if __name__ == "__main__":
+    main()

From b2014b20ef2db0d984ad1c84d3e113d79acbe721 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 19 Dec 2025 10:43:58 +0100
Subject: [PATCH 15/51] Add L2_DOUBLEBUFFER_MODELS to the pytest suite, add -s
 to debug slow test, fix generic runner

---
 .github/workflows/_runner-generic.yml  |  2 +-
 .github/workflows/_runner-siracusa.yml |  2 +-
 DeeployTest/test_platforms.py          | 32 ++++++++++++++++++++++++++
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_runner-generic.yml b/.github/workflows/_runner-generic.yml
index f9e4a796f0..88003ae699 100644
--- a/.github/workflows/_runner-generic.yml
+++ b/.github/workflows/_runner-generic.yml
@@ -41,5 +41,5 @@ jobs:
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          pytest test_generic.py -v -n 4 -m "${{ inputs.pytest-marker }}"
+          pytest test_platform.py -v -n 4 -m "generic and ${{ inputs.pytest-marker }}"
         shell: bash
diff --git a/.github/workflows/_runner-siracusa.yml b/.github/workflows/_runner-siracusa.yml
index f5f1c91f99..27ebe8de28 100644
--- a/.github/workflows/_runner-siracusa.yml
+++ b/.github/workflows/_runner-siracusa.yml
@@ -44,5 +44,5 @@ jobs:
           export CCACHE_DIR=/app/.ccache
           
           # Run tests using pytest markers
-          pytest test_platforms.py::test_siracusa_${{ inputs.test-type }} -v
+          pytest test_platforms.py::test_siracusa_${{ inputs.test-type }} -v -s
         shell: bash
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index d59b115a0f..161b10d56c 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -17,6 +17,7 @@
     L2_SINGLEBUFFER_KERNELS,
     L2_DOUBLEBUFFER_KERNELS,
     L2_SINGLEBUFFER_MODELS,
+    L2_DOUBLEBUFFER_MODELS,
     L3_SINGLEBUFFER_MODELS,
     L3_DOUBLEBUFFER_MODELS,
 )
@@ -307,6 +308,37 @@ def test_siracusa_tiled_models_l2_singlebuffer(
     run_and_assert_test(test_name, config, skipgen, skipsim)
 
 
+@pytest.mark.siracusa_tiled
+@pytest.mark.models
+@pytest.mark.doublebuffer
+@pytest.mark.l2
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(L2_DOUBLEBUFFER_MODELS, "L2-doublebuffer"),
+    ids = param_id,
+)
+def test_siracusa_tiled_models_l2_doublebuffer(
+    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
+) -> None:
+    """Test Siracusa tiled model tests (L2, double-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = SIRACUSA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L2",
+        double_buffer = True,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
 @pytest.mark.siracusa_tiled
 @pytest.mark.models
 @pytest.mark.singlebuffer

From 5e1e63bc1e9b91daef3602cac6ae910334400a97 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 19 Dec 2025 10:48:52 +0100
Subject: [PATCH 16/51] Fix typo

---
 .github/workflows/_runner-generic.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_runner-generic.yml b/.github/workflows/_runner-generic.yml
index 88003ae699..6681cbac96 100644
--- a/.github/workflows/_runner-generic.yml
+++ b/.github/workflows/_runner-generic.yml
@@ -41,5 +41,5 @@ jobs:
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          pytest test_platform.py -v -n 4 -m "generic and ${{ inputs.pytest-marker }}"
+          pytest test_platforms.py -v -n 4 -m "generic and ${{ inputs.pytest-marker }}"
         shell: bash

From b3789db4f5c32064b972b52387363651f1481a02 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 19 Dec 2025 11:53:28 +0100
Subject: [PATCH 17/51] Make test use common build folder among a worker to
 improve compilation speed

---
 DeeployTest/conftest.py               | 8 +++++++-
 DeeployTest/testUtils/pytestRunner.py | 9 ++++++---
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index c671152d4f..2c105b506b 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -92,13 +92,19 @@ def toolchain_dir(request):
     return toolchain_install
 
 
-@pytest.fixture(scope = "session")
+@pytest.fixture(scope = "session", autouse = True)
 def ccache_dir():
     """Setup and return ccache directory."""
+    # Use existing CCACHE_DIR if already set
+    if "CCACHE_DIR" in os.environ:
+        return Path(os.environ["CCACHE_DIR"])
+    
+    # Fall back to /app/.ccache if it exists (for CI containers)
     ccache_path = Path("/app/.ccache")
     if ccache_path.exists():
         os.environ["CCACHE_DIR"] = str(ccache_path)
         return ccache_path
+    
     return None
 
 
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index 08e7949353..324962d242 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -412,9 +412,12 @@ def create_test_config(
 
     worker_id = get_worker_id()
     
-    # VJUNG: Build dir has to be unique for each worker to prevent conflict
-    build_suffix = Path(gen_dir).name
-    build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}_{build_suffix}")
+    # Build directory: shared per worker, not per test (for ccache efficiency)
+    # Only add worker suffix for parallel execution (worker_id != "master")
+    if worker_id == "master":
+        build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / "build_master")
+    else:
+        build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}")
 
     cmake_args_list = list(cmake_args) if cmake_args else []
     if cores is not None:

From f49f319bc77118dea174f75826519accf8b5eb61 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 19 Dec 2025 13:31:34 +0100
Subject: [PATCH 18/51] Cleanup unused runners and increase timeout for L3
 models

---
 .../_runner-siracusa-tiled-models.yml         |  2 +-
 .../_runner-siracusa-tiled-sequential.yml     | 72 --------------
 .github/workflows/_runner-siracusa-tiled.yml  | 93 -------------------
 3 files changed, 1 insertion(+), 166 deletions(-)
 delete mode 100644 .github/workflows/_runner-siracusa-tiled-sequential.yml
 delete mode 100644 .github/workflows/_runner-siracusa-tiled.yml

diff --git a/.github/workflows/_runner-siracusa-tiled-models.yml b/.github/workflows/_runner-siracusa-tiled-models.yml
index b0341f0eb0..10b52cae99 100644
--- a/.github/workflows/_runner-siracusa-tiled-models.yml
+++ b/.github/workflows/_runner-siracusa-tiled-models.yml
@@ -41,7 +41,7 @@ jobs:
       - name: Run model test with retry
         uses: nick-fields/retry@v3
         with:
-          timeout_minutes: 10
+          timeout_minutes: 20
           max_attempts: 3
           retry_on: error
           command: |
diff --git a/.github/workflows/_runner-siracusa-tiled-sequential.yml b/.github/workflows/_runner-siracusa-tiled-sequential.yml
deleted file mode 100644
index 18a6ef8e0a..0000000000
--- a/.github/workflows/_runner-siracusa-tiled-sequential.yml
+++ /dev/null
@@ -1,72 +0,0 @@
-# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
----
-name: _runner-siracusa-tiled
-
-"on":
-  workflow_call:
-    inputs:
-      runner:
-        required: true
-        type: string
-      docker-image:
-        required: true
-        type: string
-      tests-config:
-        required: true
-        type: string
-      num-cores:
-        required: false
-        default: 8
-        type: number
-      default-memory-level:
-        required: false
-        default: "L2"
-        type: string
-      double-buffer:
-        required: false
-        default: false
-        type: boolean
-      memory-allocation-strategy:
-        required: false
-        default: "MiniMalloc"
-        type: string
-      search-strategy:
-        required: false
-        default: "random-max"
-        type: string
-
-jobs:
-  test-runner-siracusa-tiled:
-    runs-on: ${{ inputs.runner }}
-    container:
-      image: ${{ inputs.docker-image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Cache ccache
-        uses: actions/cache/restore@v4
-        with:
-          path: /app/.ccache
-          key: ccache-ci
-      - name: Run Tests
-        run: |
-          cd DeeployTest
-          mkdir -p /app/.ccache
-          export CCACHE_DIR=/app/.ccache
-
-          # Determine buffer mode and memory level for pytest markers
-          BUFFER_MARKER="${{ inputs.double-buffer && 'doublebuffer' || 'singlebuffer' }}"
-          MEMLEVEL_MARKER="${{ inputs.default-memory-level == 'L3' && 'l3' || 'l2' }}"
-          
-          # Run all kernel tests matching the buffer and memory level configuration
-          echo "Running Siracusa tiled kernel tests (${MEMLEVEL_MARKER}, ${BUFFER_MARKER})"
-          pytest test_platforms.py::test_siracusa_tiled_kernels_${MEMLEVEL_MARKER}_${BUFFER_MARKER} -v
-        shell: bash
diff --git a/.github/workflows/_runner-siracusa-tiled.yml b/.github/workflows/_runner-siracusa-tiled.yml
deleted file mode 100644
index ed8285273f..0000000000
--- a/.github/workflows/_runner-siracusa-tiled.yml
+++ /dev/null
@@ -1,93 +0,0 @@
-# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
----
-name: _runner-siracusa-tiled
-
-"on":
-  workflow_call:
-    inputs:
-      runner:
-        required: true
-        type: string
-      docker-image:
-        required: true
-        type: string
-      test-name:
-        required: true
-        type: string
-      num-cores:
-        required: false
-        default: 8
-        type: number
-      L1:
-        required: false
-        default: "[64000]"
-        type: string
-      default-memory-level:
-        required: false
-        default: "L2"
-        type: string
-      double-buffer:
-        required: false
-        default: false
-        type: boolean
-      memory-allocation-strategy:
-        required: false
-        default: "MiniMalloc"
-        type: string
-      search-strategy:
-        required: false
-        default: "random-max"
-        type: string
-
-jobs:
-  test-runner-siracusa-tiled:
-    strategy:
-      fail-fast: false
-      matrix:
-        L1: ${{ fromJSON(inputs.L1) }}
-    runs-on: ${{ inputs.runner }}
-    container:
-      image: ${{ inputs.docker-image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Cache ccache
-        uses: actions/cache/restore@v4
-        with:
-          path: /app/.ccache
-          key: ccache-ci
-      - name: Run Test
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 15
-          max_attempts: 3
-          retry_on: timeout
-          command: |
-            cd DeeployTest
-            mkdir -p /app/.ccache
-            export CCACHE_DIR=/app/.ccache
-            
-            # Determine buffer mode and memory level for pytest markers
-            BUFFER_MARKER="${{ inputs.double-buffer && 'doublebuffer' || 'singlebuffer' }}"
-            MEMLEVEL_MARKER="${{ inputs.default-memory-level == 'L3' && 'l3' || 'l2' }}"
-            
-            # Determine if it's a kernel or model test
-            TEST_TYPE="kernels"
-            if [[ "${{ inputs.test-name }}" == *"/"* ]] || [[ "${{ inputs.test-name }}" =~ (simpleRegression|MobileNet|Attention|Transformer|Llama|MLPerf|CCT|TinyViT) ]]; then
-              TEST_TYPE="models"
-            fi
-            
-            # Build test ID pattern: testname-L1value-config
-            TEST_PATTERN="${{ inputs.test-name }}-${{ matrix.L1 }}-${{ inputs.default-memory-level }}-${BUFFER_MARKER}"
-            
-            # Run pytest with specific test matching the pattern
-            pytest test_platforms.py::test_siracusa_tiled_${TEST_TYPE}_${MEMLEVEL_MARKER}_${BUFFER_MARKER} -k "$TEST_PATTERN" -v
-          shell: bash

From 262a64bcc3b7e6eab970f0baca90bb63733e1979 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 19 Dec 2025 13:57:12 +0100
Subject: [PATCH 19/51] format and lint

---
 .../_runner-siracusa-tiled-kernels.yml        |  5 ++
 .../_runner-siracusa-tiled-models.yml         |  5 ++
 .github/workflows/_runner-siracusa.yml        |  2 +-
 .../workflows/ci-platform-siracusa-tiled.yml  |  2 +-
 Deeploy/TilingExtension/TilerExtension.py     | 12 +++--
 DeeployTest/conftest.py                       |  4 +-
 DeeployTest/testMVP.py                        |  4 +-
 DeeployTest/testUtils/pytestRunner.py         |  6 +--
 DeeployTest/test_cortexm_config.py            |  1 -
 DeeployTest/test_generic_config.py            |  1 -
 DeeployTest/test_platforms.py                 | 50 +++++++------------
 DeeployTest/test_siracusa_config.py           |  1 -
 scripts/generate_test_matrix.py               | 25 +++++-----
 13 files changed, 57 insertions(+), 61 deletions(-)

diff --git a/.github/workflows/_runner-siracusa-tiled-kernels.yml b/.github/workflows/_runner-siracusa-tiled-kernels.yml
index 08288e84d4..118807035d 100644
--- a/.github/workflows/_runner-siracusa-tiled-kernels.yml
+++ b/.github/workflows/_runner-siracusa-tiled-kernels.yml
@@ -1,3 +1,8 @@
+# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+---
 name: Siracusa Tiled Kernels Runner
 
 on:
diff --git a/.github/workflows/_runner-siracusa-tiled-models.yml b/.github/workflows/_runner-siracusa-tiled-models.yml
index 10b52cae99..65b1132ad9 100644
--- a/.github/workflows/_runner-siracusa-tiled-models.yml
+++ b/.github/workflows/_runner-siracusa-tiled-models.yml
@@ -1,3 +1,8 @@
+# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+---
 name: Siracusa Tiled Models Runner
 
 on:
diff --git a/.github/workflows/_runner-siracusa.yml b/.github/workflows/_runner-siracusa.yml
index 27ebe8de28..eb9a098b5f 100644
--- a/.github/workflows/_runner-siracusa.yml
+++ b/.github/workflows/_runner-siracusa.yml
@@ -42,7 +42,7 @@ jobs:
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          
+
           # Run tests using pytest markers
           pytest test_platforms.py::test_siracusa_${{ inputs.test-type }} -v -s
         shell: bash
diff --git a/.github/workflows/ci-platform-siracusa-tiled.yml b/.github/workflows/ci-platform-siracusa-tiled.yml
index ce0f71f133..4094092696 100644
--- a/.github/workflows/ci-platform-siracusa-tiled.yml
+++ b/.github/workflows/ci-platform-siracusa-tiled.yml
@@ -132,4 +132,4 @@ jobs:
       docker-image: ${{ needs.select-env.outputs.image }}
       test-name: ${{ matrix.test-name }}
       memory-level: "l3"
-      buffer-mode: "doublebuffer"
\ No newline at end of file
+      buffer-mode: "doublebuffer"
diff --git a/Deeploy/TilingExtension/TilerExtension.py b/Deeploy/TilingExtension/TilerExtension.py
index b4fb4a626c..9b48d9456c 100644
--- a/Deeploy/TilingExtension/TilerExtension.py
+++ b/Deeploy/TilingExtension/TilerExtension.py
@@ -73,7 +73,7 @@ def __init__(self, memoryHierarchy: MemoryHierarchy, testName: Optional[str] = N
         else:
             minimalloc_base = self._MINIMALLOC_INPUT_FILENAME
             minimalloc_output_base = self._MINIMALLOC_OUTPUT_FILENAME
-        
+
         if testName is not None:
             # VJUNG: Sanitize path
             safe_test_name = testName.replace("/", "_").replace("\\", "_")
@@ -289,8 +289,8 @@ def minimalloc(self, memoryMap, ctxt, nodeMemoryConstraint, capacity: int, memor
             raise KeyError("MINIMALLOC_INSTALL_DIR symbol not found!")
 
         minimallocOutput = subprocess.run([
-            f"{minimallocInstallDir}/minimalloc", f"--capacity={capacity}",
-            f"--input={self._minimalloc_input}.csv", f"--output={self._minimalloc_output}.csv"
+            f"{minimallocInstallDir}/minimalloc", f"--capacity={capacity}", f"--input={self._minimalloc_input}.csv",
+            f"--output={self._minimalloc_output}.csv"
         ],
                                           capture_output = True,
                                           text = True)
@@ -961,7 +961,11 @@ def testMemoryMapCorrectness(self, memoryMap: Dict[str, List[List[MemoryBlock]]]
 
 class TilerDeployerWrapper(NetworkDeployerWrapper):
 
-    def __init__(self, deployer: Union[MemoryLevelAwareDeployer, MemoryDeployerWrapper], tilerCls: Type[Tiler] = Tiler, testName: Optional[str] = None, workDir: Optional[str] = None):
+    def __init__(self,
+                 deployer: Union[MemoryLevelAwareDeployer, MemoryDeployerWrapper],
+                 tilerCls: Type[Tiler] = Tiler,
+                 testName: Optional[str] = None,
+                 workDir: Optional[str] = None):
         super().__init__(deployer)
         assert isinstance(self.Platform, (MemoryPlatform, MemoryPlatformWrapper)), \
             f"Platform should be a MemoryPlatform or MemoryPlatformWrapper! Got {type(self.Platform).__name__}"
diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index 2c105b506b..1198a5ab34 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -98,13 +98,13 @@ def ccache_dir():
     # Use existing CCACHE_DIR if already set
     if "CCACHE_DIR" in os.environ:
         return Path(os.environ["CCACHE_DIR"])
-    
+
     # Fall back to /app/.ccache if it exists (for CI containers)
     ccache_path = Path("/app/.ccache")
     if ccache_path.exists():
         os.environ["CCACHE_DIR"] = str(ccache_path)
         return ccache_path
-    
+
     return None
 
 
diff --git a/DeeployTest/testMVP.py b/DeeployTest/testMVP.py
index 5c81be8a06..01216984af 100644
--- a/DeeployTest/testMVP.py
+++ b/DeeployTest/testMVP.py
@@ -3,9 +3,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import argparse
+import hashlib
 import os
 import sys
-import hashlib
 from collections import OrderedDict
 from typing import List, Tuple
 
@@ -119,7 +119,7 @@ def setupDeployer(graph: gs.Graph, memoryHierarchy: MemoryHierarchy, defaultTarg
     # VJUNG: Create unique ID for the IO files of minimalloc and prevent conflict in case of parallel execution
     unique_params = f"{args.dumpdir}_L1{args.l1}_L2{args.l2}_{args.defaultMemLevel}_DB{args.doublebuffer}"
     testIdentifier = hashlib.md5(unique_params.encode()).hexdigest()[:16]
-    
+
     if args.doublebuffer:
         assert args.defaultMemLevel in ["L3", "L2"]
         if args.defaultMemLevel == "L3":
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index 324962d242..f9a5cdbf68 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -411,7 +411,7 @@ def create_test_config(
     gen_dir, test_dir_abs, test_name_clean = get_test_paths(test_dir, platform, base_dir = deeploy_test_dir)
 
     worker_id = get_worker_id()
-    
+
     # Build directory: shared per worker, not per test (for ccache efficiency)
     # Only add worker suffix for parallel execution (worker_id != "master")
     if worker_id == "master":
@@ -424,10 +424,10 @@ def create_test_config(
         cmake_args_list.append(f"NUM_CORES={cores}")
 
     gen_args_list = []
-    
+
     if cores is not None and platform in ["Siracusa", "Siracusa_w_neureka"]:
         gen_args_list.append(f"--cores={cores}")
-    
+
     if tiling:
         if l1 is not None:
             gen_args_list.append(f"--l1={l1}")
diff --git a/DeeployTest/test_cortexm_config.py b/DeeployTest/test_cortexm_config.py
index c5b2b14103..1de427bcf8 100644
--- a/DeeployTest/test_cortexm_config.py
+++ b/DeeployTest/test_cortexm_config.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Test configuration for Cortex-M (QEMU-ARM) platform."""
 
 KERNEL_TESTS = [
diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py
index fa7f525550..c99e961c6e 100644
--- a/DeeployTest/test_generic_config.py
+++ b/DeeployTest/test_generic_config.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Test configuration for Generic platform."""
 
 KERNEL_TESTS = [
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index 161b10d56c..d7e0819c4f 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -3,24 +3,17 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
-from testUtils.pytestRunner import create_test_config, run_and_assert_test
-
+from test_cortexm_config import KERNEL_TESTS as CORTEXM_KERNEL_TESTS
+from test_cortexm_config import MODEL_TESTS as CORTEXM_MODEL_TESTS
 # Import platform-specific test configurations
 from test_generic_config import KERNEL_TESTS as GENERIC_KERNEL_TESTS
 from test_generic_config import MODEL_TESTS as GENERIC_MODEL_TESTS
-from test_cortexm_config import KERNEL_TESTS as CORTEXM_KERNEL_TESTS
-from test_cortexm_config import MODEL_TESTS as CORTEXM_MODEL_TESTS
+from test_siracusa_config import DEFAULT_CORES as SIRACUSA_DEFAULT_CORES
 from test_siracusa_config import KERNEL_TESTS as SIRACUSA_KERNEL_TESTS
 from test_siracusa_config import MODEL_TESTS as SIRACUSA_MODEL_TESTS
-from test_siracusa_config import DEFAULT_CORES as SIRACUSA_DEFAULT_CORES
-from test_siracusa_tiled_config import (
-    L2_SINGLEBUFFER_KERNELS,
-    L2_DOUBLEBUFFER_KERNELS,
-    L2_SINGLEBUFFER_MODELS,
-    L2_DOUBLEBUFFER_MODELS,
-    L3_SINGLEBUFFER_MODELS,
-    L3_DOUBLEBUFFER_MODELS,
-)
+from test_siracusa_tiled_config import L2_DOUBLEBUFFER_KERNELS, L2_DOUBLEBUFFER_MODELS, L2_SINGLEBUFFER_KERNELS, \
+    L2_SINGLEBUFFER_MODELS, L3_DOUBLEBUFFER_MODELS, L3_SINGLEBUFFER_MODELS
+from testUtils.pytestRunner import create_test_config, run_and_assert_test
 
 
 def generate_test_params(test_dict, config_name):
@@ -63,7 +56,6 @@ def param_id(param):
     },
 }
 
-
 ### Markers summary ###
 # Platform markers:
 #   generic: tests from the generic platform
@@ -209,9 +201,8 @@ def test_siracusa_models(test_name, deeploy_test_dir, toolchain, toolchain_dir,
     generate_test_params(L2_SINGLEBUFFER_KERNELS, "L2-singlebuffer"),
     ids = param_id,
 )
-def test_siracusa_tiled_kernels_l2_singlebuffer(
-    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
-) -> None:
+def test_siracusa_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
+                                                skipgen, skipsim) -> None:
     """Test Siracusa tiled kernel tests (L2, single-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
@@ -255,9 +246,8 @@ def test_siracusa_tiled_kernels_l2_singlebuffer(
     generate_test_params(L2_DOUBLEBUFFER_KERNELS, "L2-doublebuffer"),
     ids = param_id,
 )
-def test_siracusa_tiled_kernels_l2_doublebuffer(
-    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
-) -> None:
+def test_siracusa_tiled_kernels_l2_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
+                                                skipgen, skipsim) -> None:
     """Test Siracusa tiled kernel tests (L2, double-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
@@ -286,9 +276,8 @@ def test_siracusa_tiled_kernels_l2_doublebuffer(
     generate_test_params(L2_SINGLEBUFFER_MODELS, "L2-singlebuffer"),
     ids = param_id,
 )
-def test_siracusa_tiled_models_l2_singlebuffer(
-    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
-) -> None:
+def test_siracusa_tiled_models_l2_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
+                                               skipgen, skipsim) -> None:
     """Test Siracusa tiled model tests (L2, single-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
@@ -317,9 +306,8 @@ def test_siracusa_tiled_models_l2_singlebuffer(
     generate_test_params(L2_DOUBLEBUFFER_MODELS, "L2-doublebuffer"),
     ids = param_id,
 )
-def test_siracusa_tiled_models_l2_doublebuffer(
-    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
-) -> None:
+def test_siracusa_tiled_models_l2_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
+                                               skipgen, skipsim) -> None:
     """Test Siracusa tiled model tests (L2, double-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
@@ -348,9 +336,8 @@ def test_siracusa_tiled_models_l2_doublebuffer(
     generate_test_params(L3_SINGLEBUFFER_MODELS, "L3-singlebuffer"),
     ids = param_id,
 )
-def test_siracusa_tiled_models_l3_singlebuffer(
-    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
-) -> None:
+def test_siracusa_tiled_models_l3_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
+                                               skipgen, skipsim) -> None:
     """Test Siracusa tiled model tests (L3, single-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
@@ -379,9 +366,8 @@ def test_siracusa_tiled_models_l3_singlebuffer(
     generate_test_params(L3_DOUBLEBUFFER_MODELS, "L3-doublebuffer"),
     ids = param_id,
 )
-def test_siracusa_tiled_models_l3_doublebuffer(
-    test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim
-) -> None:
+def test_siracusa_tiled_models_l3_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
+                                               skipgen, skipsim) -> None:
     """Test Siracusa tiled model tests (L3, double-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
diff --git a/DeeployTest/test_siracusa_config.py b/DeeployTest/test_siracusa_config.py
index 371cb273e3..b1f86af97e 100644
--- a/DeeployTest/test_siracusa_config.py
+++ b/DeeployTest/test_siracusa_config.py
@@ -2,7 +2,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-
 PLATFORM_NAME = "Siracusa"
 SIMULATOR = "gvsoc"
 DEFAULT_CORES = 8
diff --git a/scripts/generate_test_matrix.py b/scripts/generate_test_matrix.py
index cefea79529..c25fe534f0 100755
--- a/scripts/generate_test_matrix.py
+++ b/scripts/generate_test_matrix.py
@@ -1,4 +1,7 @@
 #!/usr/bin/env python3
+# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
 """
 Generate GitHub Actions test matrix from Python test configuration.
 
@@ -13,22 +16,18 @@
 # Add DeeployTest to path to import config
 sys.path.insert(0, str(Path(__file__).parent.parent / "DeeployTest"))
 
-from test_siracusa_tiled_config import (
-    L2_SINGLEBUFFER_MODELS,
-    L2_DOUBLEBUFFER_MODELS,
-    L3_SINGLEBUFFER_MODELS,
-    L3_DOUBLEBUFFER_MODELS,
-)
+from test_siracusa_tiled_config import L2_DOUBLEBUFFER_MODELS, L2_SINGLEBUFFER_MODELS, L3_DOUBLEBUFFER_MODELS, \
+    L3_SINGLEBUFFER_MODELS
 
 
 def main():
     if len(sys.argv) != 2:
-        print("Usage: generate_test_matrix.py <config-key>", file=sys.stderr)
-        print("config-key must be one of:", file=sys.stderr)
-        print("  l2-singlebuffer-models", file=sys.stderr)
-        print("  l2-doublebuffer-models", file=sys.stderr)
-        print("  l3-singlebuffer-models", file=sys.stderr)
-        print("  l3-doublebuffer-models", file=sys.stderr)
+        print("Usage: generate_test_matrix.py <config-key>", file = sys.stderr)
+        print("config-key must be one of:", file = sys.stderr)
+        print("  l2-singlebuffer-models", file = sys.stderr)
+        print("  l2-doublebuffer-models", file = sys.stderr)
+        print("  l3-singlebuffer-models", file = sys.stderr)
+        print("  l3-doublebuffer-models", file = sys.stderr)
         sys.exit(1)
 
     config_key = sys.argv[1]
@@ -42,7 +41,7 @@ def main():
     }
 
     if config_key not in config_map:
-        print(f"Error: Unknown config-key '{config_key}'", file=sys.stderr)
+        print(f"Error: Unknown config-key '{config_key}'", file = sys.stderr)
         sys.exit(1)
 
     # Extract test names from the dictionary keys

From 70f1a19778b38be9552e4b7a1faee1e60391ddb0 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Mon, 5 Jan 2026 15:57:05 +0100
Subject: [PATCH 20/51] Migrate Mempool tests to PyTest

---
 .github/workflows/_runner-mempool.yml     | 12 ++---
 .github/workflows/ci-platform-mempool.yml | 31 +-----------
 DeeployTest/conftest.py                   |  1 +
 DeeployTest/test_mempool_config.py        | 47 ++++++++++++++++++
 DeeployTest/test_platforms.py             | 58 +++++++++++++++++++++--
 5 files changed, 107 insertions(+), 42 deletions(-)
 create mode 100644 DeeployTest/test_mempool_config.py

diff --git a/.github/workflows/_runner-mempool.yml b/.github/workflows/_runner-mempool.yml
index edd048ba9d..deb4809330 100644
--- a/.github/workflows/_runner-mempool.yml
+++ b/.github/workflows/_runner-mempool.yml
@@ -14,7 +14,7 @@ name: _runner-mempool
       docker-image:
         required: true
         type: string
-      test-names:
+      pytest-marker:
         required: true
         type: string
 
@@ -36,16 +36,10 @@ jobs:
         with:
           path: /app/.ccache
           key: ccache-ci
-      - name: Run Test
+      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
         run: |
-          testNames="${{ inputs.test-names }}"
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          echo "$testNames" | while IFS= read -r testName; do
-            if [[ -n "$testName" ]]; then
-              echo "Running test: $testName"
-              python testRunner_mempool.py -t Tests/$testName
-            fi
-          done
+          pytest test_platforms.py -v -n 4 -m "mempool and ${{ inputs.pytest-marker }}"
         shell: bash
diff --git a/.github/workflows/ci-platform-mempool.yml b/.github/workflows/ci-platform-mempool.yml
index f7394c04da..efda508257 100644
--- a/.github/workflows/ci-platform-mempool.yml
+++ b/.github/workflows/ci-platform-mempool.yml
@@ -35,27 +35,7 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        Adder
-        MultIO
-        test1DConvolution
-        test2DConvolution
-        test1DDWConvolution
-        test2DDWConvolution
-        test1DPad
-        test2DPad
-        testGEMM
-        testMatMul
-        testMatMulAdd
-        testMaxPool
-        testRQConv
-        testRQGEMM
-        testRQMatMul
-        testReduceSum
-        testReduceMean
-        testSlice
-        testRequantizedDWConv
-        test2DRequantizedConv
+      pytest-marker: "kernels"
 
   mempool-models:
     needs: select-env
@@ -63,11 +43,4 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        simpleRegression
-        simpleCNN
-        ICCT
-        ICCT_ITA
-        ICCT_8
-        miniMobileNet
-        miniMobileNetv2
+      pytest-marker: "models"
diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index 1198a5ab34..5628a6b052 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -51,6 +51,7 @@ def pytest_configure(config: pytest.Config) -> None:
     # Register custom markers
     config.addinivalue_line("markers", "generic: mark test as a Generic platform test")
     config.addinivalue_line("markers", "cortexm: mark test as a Cortex-M (QEMU-ARM) platform test")
+    config.addinivalue_line("markers", "mempool: mark test as a MemPool platform test")
     config.addinivalue_line("markers", "siracusa: mark test as a Siracusa platform test (untiled)")
     config.addinivalue_line("markers", "siracusa_tiled: mark test as a Siracusa platform test (tiled)")
     config.addinivalue_line("markers", "kernels: mark test as a kernel test (individual operators)")
diff --git a/DeeployTest/test_mempool_config.py b/DeeployTest/test_mempool_config.py
new file mode 100644
index 0000000000..12e85578af
--- /dev/null
+++ b/DeeployTest/test_mempool_config.py
@@ -0,0 +1,47 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Test configuration for MemPool platform.
+
+This module defines the test lists and default parameters for MemPool platform tests.
+"""
+
+# Default number of threads for MemPool
+DEFAULT_NUM_THREADS = 16
+
+# Kernel tests (individual operators)
+KERNEL_TESTS = [
+    "Adder",
+    "MultIO",
+    "test1DConvolution",
+    "test2DConvolution",
+    "test1DDWConvolution",
+    "test2DDWConvolution",
+    "test1DPad",
+    "test2DPad",
+    "testGEMM",
+    "testMatMul",
+    "testMatMulAdd",
+    "testMaxPool",
+    "testRQConv",
+    "testRQGEMM",
+    "testRQMatMul",
+    "testReduceSum",
+    "testReduceMean",
+    "testSlice",
+    "testRequantizedDWConv",
+    "test2DRequantizedConv",
+]
+
+# Model tests (full networks)
+MODEL_TESTS = [
+    "simpleRegression",
+    "simpleCNN",
+    "ICCT",
+    "ICCT_ITA",
+    "ICCT_8",
+    "miniMobileNet",
+    "miniMobileNetv2",
+]
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index d7e0819c4f..c7f3937a53 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -8,6 +8,9 @@
 # Import platform-specific test configurations
 from test_generic_config import KERNEL_TESTS as GENERIC_KERNEL_TESTS
 from test_generic_config import MODEL_TESTS as GENERIC_MODEL_TESTS
+from test_mempool_config import DEFAULT_NUM_THREADS as MEMPOOL_DEFAULT_NUM_THREADS
+from test_mempool_config import KERNEL_TESTS as MEMPOOL_KERNEL_TESTS
+from test_mempool_config import MODEL_TESTS as MEMPOOL_MODEL_TESTS
 from test_siracusa_config import DEFAULT_CORES as SIRACUSA_DEFAULT_CORES
 from test_siracusa_config import KERNEL_TESTS as SIRACUSA_KERNEL_TESTS
 from test_siracusa_config import MODEL_TESTS as SIRACUSA_MODEL_TESTS
@@ -54,6 +57,13 @@ def param_id(param):
         "kernel_tests": CORTEXM_KERNEL_TESTS,
         "model_tests": CORTEXM_MODEL_TESTS,
     },
+    "mempool": {
+        "platform": "MemPool",
+        "simulator": "banshee",
+        "kernel_tests": MEMPOOL_KERNEL_TESTS,
+        "model_tests": MEMPOOL_MODEL_TESTS,
+        "default_num_threads": MEMPOOL_DEFAULT_NUM_THREADS,
+    },
 }
 
 ### Markers summary ###
@@ -148,7 +158,50 @@ def test_cortexm_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
     run_and_assert_test(test_name, config, skipgen, skipsim)
 
 
-### Siracusa Platform Tests ###
+@pytest.mark.mempool
+@pytest.mark.kernels
+@pytest.mark.parametrize("test_name", MEMPOOL_KERNEL_TESTS, ids = MEMPOOL_KERNEL_TESTS)
+def test_mempool_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test MemPool platform kernel tests."""
+    platform_config = PLATFORM_CONFIGS["mempool"]
+    
+    # Add MemPool-specific CMake args for number of threads
+    mempool_cmake_args = cmake_args + [f"num_threads={platform_config['default_num_threads']}"]
+    
+    config = create_test_config(
+        test_name = test_name,
+        platform = platform_config["platform"],
+        simulator = platform_config["simulator"],
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = mempool_cmake_args,
+        tiling = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.mempool
+@pytest.mark.models
+@pytest.mark.parametrize("test_name", MEMPOOL_MODEL_TESTS, ids = MEMPOOL_MODEL_TESTS)
+def test_mempool_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test MemPool platform model tests."""
+    platform_config = PLATFORM_CONFIGS["mempool"]
+    
+    # Add MemPool-specific CMake args for number of threads
+    mempool_cmake_args = cmake_args + [f"num_threads={platform_config['default_num_threads']}"]
+    
+    config = create_test_config(
+        test_name = test_name,
+        platform = platform_config["platform"],
+        simulator = platform_config["simulator"],
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = mempool_cmake_args,
+        tiling = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
 
 
 @pytest.mark.siracusa
@@ -189,9 +242,6 @@ def test_siracusa_models(test_name, deeploy_test_dir, toolchain, toolchain_dir,
     run_and_assert_test(test_name, config, skipgen, skipsim)
 
 
-### Siracusa Tiled Platform Tests ###
-
-
 @pytest.mark.siracusa_tiled
 @pytest.mark.kernels
 @pytest.mark.singlebuffer

From 694f4f9b0eed73d8e90da2aac0b2a922d0cb93fd Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Mon, 5 Jan 2026 16:22:12 +0100
Subject: [PATCH 21/51] Migrate Snitch, Chimera, and SoftHier tests to PyTest

---
 .github/workflows/_runner-chimera.yml      |  20 +---
 .github/workflows/_runner-snitch.yml       |  23 +----
 .github/workflows/_runner-softhier.yml     |  14 +--
 .github/workflows/ci-platform-chimera.yml  |   5 +-
 .github/workflows/ci-platform-snitch.yml   |  15 +--
 .github/workflows/ci-platform-softhier.yml |   3 +-
 DeeployTest/conftest.py                    |   3 +
 DeeployTest/test_chimera_config.py         |  14 +++
 DeeployTest/test_platforms.py              | 101 ++++++++++++++++++++-
 DeeployTest/test_snitch_config.py          |  25 +++++
 DeeployTest/test_softhier_config.py        |  16 ++++
 11 files changed, 172 insertions(+), 67 deletions(-)
 create mode 100644 DeeployTest/test_chimera_config.py
 create mode 100644 DeeployTest/test_snitch_config.py
 create mode 100644 DeeployTest/test_softhier_config.py

diff --git a/.github/workflows/_runner-chimera.yml b/.github/workflows/_runner-chimera.yml
index fbfe4480f1..14e80631d1 100644
--- a/.github/workflows/_runner-chimera.yml
+++ b/.github/workflows/_runner-chimera.yml
@@ -14,10 +14,7 @@ name: _runner-chimera
       docker-image:
         required: true
         type: string
-      test-names:
-        required: true
-        type: string
-      simulators:
+      pytest-marker:
         required: true
         type: string
 
@@ -39,22 +36,11 @@ jobs:
         with:
           path: /app/.ccache
           key: ccache-ci
-      - name: Run Test
+      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
         run: |
-          testNames="${{ inputs.test-names }}"
-          simulators="${{inputs.simulators}}"
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
           export CHIMERA_SDK_HOME=/app/install/chimera-sdk
-          echo "$simulators" | while IFS= read -r simulator; do
-            if [[ -n "$simulator" ]]; then
-              echo "$testNames" | while IFS= read -r testName; do
-                if [[ -n "$testName" ]]; then
-                  echo "Running test $testName using $simulator"
-                  python testRunner_chimera.py -t Tests/$testName --simulator=$simulator
-                fi
-              done
-            fi
-          done
+          pytest test_platforms.py -v -n 4 -m "chimera and ${{ inputs.pytest-marker }}"
         shell: bash
diff --git a/.github/workflows/_runner-snitch.yml b/.github/workflows/_runner-snitch.yml
index ab0ae55ed7..bc599e4fe7 100644
--- a/.github/workflows/_runner-snitch.yml
+++ b/.github/workflows/_runner-snitch.yml
@@ -14,13 +14,7 @@ name: _runner-snitch
       docker-image:
         required: true
         type: string
-      test-names:
-        required: true
-        type: string
-      num-cores:
-        required: true
-        type: number
-      simulators:
+      pytest-marker:
         required: true
         type: string
 
@@ -42,21 +36,10 @@ jobs:
         with:
           path: /app/.ccache
           key: ccache-ci
-      - name: Run Test
+      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
         run: |
-          testNames="${{ inputs.test-names }}"
-          simulators="${{inputs.simulators}}"
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          echo "$simulators" | while IFS= read -r simulator; do
-            if [[ -n "$simulator" ]]; then
-              echo "$testNames" | while IFS= read -r testName; do
-                if [[ -n "$testName" ]]; then
-                  echo "Running test $testName using $simulator"
-                  python testRunner_snitch.py -t Tests/$testName --simulator=$simulator --cores=${{ inputs.num-cores }}
-                fi
-              done
-            fi
-          done
+          pytest test_platforms.py -v -n 4 -m "snitch and ${{ inputs.pytest-marker }}"
         shell: bash
diff --git a/.github/workflows/_runner-softhier.yml b/.github/workflows/_runner-softhier.yml
index e06aea3b9b..b448cd7505 100644
--- a/.github/workflows/_runner-softhier.yml
+++ b/.github/workflows/_runner-softhier.yml
@@ -14,7 +14,7 @@ name: _runner-softhier
       docker-image:
         required: true
         type: string
-      test-names:
+      pytest-marker:
         required: true
         type: string
 
@@ -31,15 +31,11 @@ jobs:
       - name: Build Deeploy
         shell: bash
         run: pip install -e .
-      - name: Run Test
+      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
         run: |
-          testNames="${{ inputs.test-names }}"
           export SOFTHIER_INSTALL_DIR=/app/install/softhier
           cd DeeployTest
-          echo "$testNames" | while IFS= read -r testName; do
-            if [[ -n "$testName" ]]; then
-              echo "Running test: $testName"
-              python testRunner_softhier.py -t Tests/$testName --toolchain=GCC
-            fi
-          done
+          mkdir -p /app/.ccache
+          export CCACHE_DIR=/app/.ccache
+          pytest test_platforms.py -v -n 4 -m "softhier and ${{ inputs.pytest-marker }}" --toolchain=GCC
         shell: bash
diff --git a/.github/workflows/ci-platform-chimera.yml b/.github/workflows/ci-platform-chimera.yml
index 79db97abd0..aad065ae78 100644
--- a/.github/workflows/ci-platform-chimera.yml
+++ b/.github/workflows/ci-platform-chimera.yml
@@ -35,7 +35,4 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        Adder
-      simulators: |
-        gvsoc
+      pytest-marker: "kernels"
diff --git a/.github/workflows/ci-platform-snitch.yml b/.github/workflows/ci-platform-snitch.yml
index 3968ba3201..c1ae694148 100644
--- a/.github/workflows/ci-platform-snitch.yml
+++ b/.github/workflows/ci-platform-snitch.yml
@@ -35,17 +35,4 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        Adder
-        iSoftmax
-        TestiNoNorm
-        TestAdderLarge
-        TestiSoftmaxLarge
-        testMatMul
-        testRQGEMM
-        TestRQAdd
-        testRQGEMMTransB
-        testFloatSoftmax
-      num-cores: 9
-      simulators: |
-        gvsoc
+      pytest-marker: "kernels"
diff --git a/.github/workflows/ci-platform-softhier.yml b/.github/workflows/ci-platform-softhier.yml
index 959dca131b..28a85160be 100644
--- a/.github/workflows/ci-platform-softhier.yml
+++ b/.github/workflows/ci-platform-softhier.yml
@@ -35,5 +35,4 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        Adder
+      pytest-marker: "kernels"
diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index 5628a6b052..6d0605cfd6 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -52,6 +52,9 @@ def pytest_configure(config: pytest.Config) -> None:
     config.addinivalue_line("markers", "generic: mark test as a Generic platform test")
     config.addinivalue_line("markers", "cortexm: mark test as a Cortex-M (QEMU-ARM) platform test")
     config.addinivalue_line("markers", "mempool: mark test as a MemPool platform test")
+    config.addinivalue_line("markers", "chimera: mark test as a Chimera platform test")
+    config.addinivalue_line("markers", "softhier: mark test as a SoftHier platform test")
+    config.addinivalue_line("markers", "snitch: mark test as a Snitch platform test")
     config.addinivalue_line("markers", "siracusa: mark test as a Siracusa platform test (untiled)")
     config.addinivalue_line("markers", "siracusa_tiled: mark test as a Siracusa platform test (tiled)")
     config.addinivalue_line("markers", "kernels: mark test as a kernel test (individual operators)")
diff --git a/DeeployTest/test_chimera_config.py b/DeeployTest/test_chimera_config.py
new file mode 100644
index 0000000000..aa27b5e22e
--- /dev/null
+++ b/DeeployTest/test_chimera_config.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test configuration for Chimera platform."""
+
+# Chimera platform uses gvsoc simulator
+# Currently only Adder test is in CI
+
+KERNEL_TESTS = [
+    "Adder",
+]
+
+MODEL_TESTS = []
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index c7f3937a53..8a2b042578 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -3,9 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
+
+# Import platform-specific test configurations
+from test_chimera_config import KERNEL_TESTS as CHIMERA_KERNEL_TESTS
+from test_chimera_config import MODEL_TESTS as CHIMERA_MODEL_TESTS
 from test_cortexm_config import KERNEL_TESTS as CORTEXM_KERNEL_TESTS
 from test_cortexm_config import MODEL_TESTS as CORTEXM_MODEL_TESTS
-# Import platform-specific test configurations
 from test_generic_config import KERNEL_TESTS as GENERIC_KERNEL_TESTS
 from test_generic_config import MODEL_TESTS as GENERIC_MODEL_TESTS
 from test_mempool_config import DEFAULT_NUM_THREADS as MEMPOOL_DEFAULT_NUM_THREADS
@@ -16,6 +19,12 @@
 from test_siracusa_config import MODEL_TESTS as SIRACUSA_MODEL_TESTS
 from test_siracusa_tiled_config import L2_DOUBLEBUFFER_KERNELS, L2_DOUBLEBUFFER_MODELS, L2_SINGLEBUFFER_KERNELS, \
     L2_SINGLEBUFFER_MODELS, L3_DOUBLEBUFFER_MODELS, L3_SINGLEBUFFER_MODELS
+from test_snitch_config import DEFAULT_NUM_CORES as SNITCH_DEFAULT_NUM_CORES
+from test_snitch_config import KERNEL_TESTS as SNITCH_KERNEL_TESTS
+from test_snitch_config import MODEL_TESTS as SNITCH_MODEL_TESTS
+from test_softhier_config import DEFAULT_NUM_CLUSTERS as SOFTHIER_DEFAULT_NUM_CLUSTERS
+from test_softhier_config import KERNEL_TESTS as SOFTHIER_KERNEL_TESTS
+from test_softhier_config import MODEL_TESTS as SOFTHIER_MODEL_TESTS
 from testUtils.pytestRunner import create_test_config, run_and_assert_test
 
 
@@ -64,12 +73,36 @@ def param_id(param):
         "model_tests": MEMPOOL_MODEL_TESTS,
         "default_num_threads": MEMPOOL_DEFAULT_NUM_THREADS,
     },
+    "chimera": {
+        "platform": "Chimera",
+        "simulator": "gvsoc",
+        "kernel_tests": CHIMERA_KERNEL_TESTS,
+        "model_tests": CHIMERA_MODEL_TESTS,
+    },
+    "softhier": {
+        "platform": "SoftHier",
+        "simulator": "gvsoc",
+        "kernel_tests": SOFTHIER_KERNEL_TESTS,
+        "model_tests": SOFTHIER_MODEL_TESTS,
+        "default_num_clusters": SOFTHIER_DEFAULT_NUM_CLUSTERS,
+    },
+    "snitch": {
+        "platform": "Snitch",
+        "simulator": "gvsoc",
+        "kernel_tests": SNITCH_KERNEL_TESTS,
+        "model_tests": SNITCH_MODEL_TESTS,
+        "default_num_cores": SNITCH_DEFAULT_NUM_CORES,
+    },
 }
 
 ### Markers summary ###
 # Platform markers:
 #   generic: tests from the generic platform
 #   cortexm: tests from the cortex-m (QEMU-ARM) platform
+#   mempool: tests from the MemPool platform
+#   chimera: tests from the Chimera platform
+#   softhier: tests from the SoftHier platform
+#   snitch: tests from the Snitch platform
 #   siracusa: tests from the Siracusa platform (untiled)
 #   siracusa_tiled: tests from the Siracusa platform (tiled)
 # Test type markers:
@@ -435,3 +468,69 @@ def test_siracusa_tiled_models_l3_doublebuffer(test_params, deeploy_test_dir, to
         double_buffer = True,
     )
     run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.chimera
+@pytest.mark.kernels
+@pytest.mark.parametrize("test_name", CHIMERA_KERNEL_TESTS, ids = CHIMERA_KERNEL_TESTS)
+def test_chimera_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test Chimera platform kernel tests."""
+    platform_config = PLATFORM_CONFIGS["chimera"]
+    config = create_test_config(
+        test_name = test_name,
+        platform = platform_config["platform"],
+        simulator = platform_config["simulator"],
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.softhier
+@pytest.mark.kernels
+@pytest.mark.parametrize("test_name", SOFTHIER_KERNEL_TESTS, ids = SOFTHIER_KERNEL_TESTS)
+def test_softhier_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test SoftHier platform kernel tests."""
+    platform_config = PLATFORM_CONFIGS["softhier"]
+    
+    # Add SoftHier-specific CMake args for number of clusters
+    softhier_cmake_args = cmake_args + [f"num_clusters={platform_config['default_num_clusters']}"]
+    
+    config = create_test_config(
+        test_name = test_name,
+        platform = platform_config["platform"],
+        simulator = platform_config["simulator"],
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = softhier_cmake_args,
+        tiling = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.snitch
+@pytest.mark.kernels
+@pytest.mark.parametrize("test_name", SNITCH_KERNEL_TESTS, ids = SNITCH_KERNEL_TESTS)
+def test_snitch_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
+    """Test Snitch platform kernel tests."""
+    platform_config = PLATFORM_CONFIGS["snitch"]
+    
+    # Add Snitch-specific CMake args for number of cores
+    snitch_cmake_args = cmake_args + [f"NUM_CORES={platform_config['default_num_cores']}"]
+    
+    config = create_test_config(
+        test_name = test_name,
+        platform = platform_config["platform"],
+        simulator = platform_config["simulator"],
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = snitch_cmake_args,
+        tiling = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
diff --git a/DeeployTest/test_snitch_config.py b/DeeployTest/test_snitch_config.py
new file mode 100644
index 0000000000..435a48acc1
--- /dev/null
+++ b/DeeployTest/test_snitch_config.py
@@ -0,0 +1,25 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test configuration for Snitch platform."""
+
+# Snitch platform supports gvsoc, banshee, vsim simulators
+# Default configuration: 9 cores
+
+DEFAULT_NUM_CORES = 9
+
+KERNEL_TESTS = [
+    "Adder",
+    "iSoftmax",
+    "TestiNoNorm",
+    "TestAdderLarge",
+    "TestiSoftmaxLarge",
+    "testMatMul",
+    "testRQGEMM",
+    "TestRQAdd",
+    "testRQGEMMTransB",
+    "testFloatSoftmax",
+]
+
+MODEL_TESTS = []
diff --git a/DeeployTest/test_softhier_config.py b/DeeployTest/test_softhier_config.py
new file mode 100644
index 0000000000..6220c8bf8f
--- /dev/null
+++ b/DeeployTest/test_softhier_config.py
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test configuration for SoftHier platform."""
+
+# SoftHier platform uses gvsoc simulator
+# Default configuration: 1 cluster
+
+DEFAULT_NUM_CLUSTERS = 1
+
+KERNEL_TESTS = [
+    "Adder",
+]
+
+MODEL_TESTS = []

From 432bfdf65eba30a64361e48c7fed6f6fba135c7e Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Mon, 5 Jan 2026 16:31:13 +0100
Subject: [PATCH 22/51] Format and Lint

---
 DeeployTest/test_chimera_config.py  |  1 -
 DeeployTest/test_mempool_config.py  |  1 -
 DeeployTest/test_platforms.py       | 18 ++++++++----------
 DeeployTest/test_snitch_config.py   |  1 -
 DeeployTest/test_softhier_config.py |  1 -
 5 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/DeeployTest/test_chimera_config.py b/DeeployTest/test_chimera_config.py
index aa27b5e22e..1896367e89 100644
--- a/DeeployTest/test_chimera_config.py
+++ b/DeeployTest/test_chimera_config.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Test configuration for Chimera platform."""
 
 # Chimera platform uses gvsoc simulator
diff --git a/DeeployTest/test_mempool_config.py b/DeeployTest/test_mempool_config.py
index 12e85578af..64660f3ab3 100644
--- a/DeeployTest/test_mempool_config.py
+++ b/DeeployTest/test_mempool_config.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """
 Test configuration for MemPool platform.
 
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index 8a2b042578..582bcee99b 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import pytest
-
 # Import platform-specific test configurations
 from test_chimera_config import KERNEL_TESTS as CHIMERA_KERNEL_TESTS
 from test_chimera_config import MODEL_TESTS as CHIMERA_MODEL_TESTS
@@ -197,10 +196,10 @@ def test_cortexm_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
 def test_mempool_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
     """Test MemPool platform kernel tests."""
     platform_config = PLATFORM_CONFIGS["mempool"]
-    
+
     # Add MemPool-specific CMake args for number of threads
     mempool_cmake_args = cmake_args + [f"num_threads={platform_config['default_num_threads']}"]
-    
+
     config = create_test_config(
         test_name = test_name,
         platform = platform_config["platform"],
@@ -220,10 +219,10 @@ def test_mempool_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 def test_mempool_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
     """Test MemPool platform model tests."""
     platform_config = PLATFORM_CONFIGS["mempool"]
-    
+
     # Add MemPool-specific CMake args for number of threads
     mempool_cmake_args = cmake_args + [f"num_threads={platform_config['default_num_threads']}"]
-    
+
     config = create_test_config(
         test_name = test_name,
         platform = platform_config["platform"],
@@ -495,10 +494,10 @@ def test_chimera_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 def test_softhier_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
     """Test SoftHier platform kernel tests."""
     platform_config = PLATFORM_CONFIGS["softhier"]
-    
+
     # Add SoftHier-specific CMake args for number of clusters
     softhier_cmake_args = cmake_args + [f"num_clusters={platform_config['default_num_clusters']}"]
-    
+
     config = create_test_config(
         test_name = test_name,
         platform = platform_config["platform"],
@@ -518,10 +517,10 @@ def test_softhier_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 def test_snitch_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
     """Test Snitch platform kernel tests."""
     platform_config = PLATFORM_CONFIGS["snitch"]
-    
+
     # Add Snitch-specific CMake args for number of cores
     snitch_cmake_args = cmake_args + [f"NUM_CORES={platform_config['default_num_cores']}"]
-    
+
     config = create_test_config(
         test_name = test_name,
         platform = platform_config["platform"],
@@ -533,4 +532,3 @@ def test_snitch_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
         tiling = False,
     )
     run_and_assert_test(test_name, config, skipgen, skipsim)
-
diff --git a/DeeployTest/test_snitch_config.py b/DeeployTest/test_snitch_config.py
index 435a48acc1..b3b1c7b776 100644
--- a/DeeployTest/test_snitch_config.py
+++ b/DeeployTest/test_snitch_config.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Test configuration for Snitch platform."""
 
 # Snitch platform supports gvsoc, banshee, vsim simulators
diff --git a/DeeployTest/test_softhier_config.py b/DeeployTest/test_softhier_config.py
index 6220c8bf8f..f5e03eddac 100644
--- a/DeeployTest/test_softhier_config.py
+++ b/DeeployTest/test_softhier_config.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Test configuration for SoftHier platform."""
 
 # SoftHier platform uses gvsoc simulator

From ad13dcf835272f977b801ad898415a1c881ce095 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Mon, 5 Jan 2026 16:58:52 +0100
Subject: [PATCH 23/51] Migrate Snitch tiled tests to PyTest

---
 .../_runner-snitch-tiled-sequential.yml       | 48 ++----------------
 .github/workflows/_runner-snitch.yml          |  2 +-
 .github/workflows/_runner-softhier.yml        |  2 +-
 .../workflows/ci-platform-snitch-tiled.yml    | 14 +-----
 DeeployTest/conftest.py                       |  1 +
 DeeployTest/test_platforms.py                 | 50 ++++++++++++++++++-
 DeeployTest/test_snitch_tiled_config.py       | 40 +++++++++++++++
 7 files changed, 96 insertions(+), 61 deletions(-)
 create mode 100644 DeeployTest/test_snitch_tiled_config.py

diff --git a/.github/workflows/_runner-snitch-tiled-sequential.yml b/.github/workflows/_runner-snitch-tiled-sequential.yml
index 4128b13186..fbd5195b08 100644
--- a/.github/workflows/_runner-snitch-tiled-sequential.yml
+++ b/.github/workflows/_runner-snitch-tiled-sequential.yml
@@ -14,26 +14,7 @@ name: _runner-snitch-tiled-sequential
       docker-image:
         required: true
         type: string
-      tests-config:
-        required: true
-        type: string
-      num-cores:
-        required: false
-        default: 9
-        type: number
-      default-memory-level:
-        required: false
-        default: "L2"
-        type: string
-      memory-allocation-strategy:
-        required: false
-        default: "MiniMalloc"
-        type: string
-      search-strategy:
-        required: false
-        default: "random-max"
-        type: string
-      simulators:
+      pytest-marker:
         required: true
         type: string
 
@@ -50,33 +31,10 @@ jobs:
       - name: Build Deeploy
         shell: bash
         run: pip install -e .
-      - name: Install jq
-        run: |
-          export DEBIAN_FRONTEND=noninteractive
-          apt-get update -y
-          apt-get install -y jq
-      - name: Cache ccache
-        uses: actions/cache/restore@v4
-        with:
-          path: /app/.ccache
-          key: ccache-ci
-      - name: Run Tests
+      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
         run: |
-          simulators="${{inputs.simulators}}"
           cd DeeployTest
-          echo '${{ inputs.tests-config }}' > tests.json
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          echo "$simulators" | while IFS= read -r simulator; do
-            if [[ -n "$simulator" ]]; then
-              jq -c '.[]' tests.json | while read test; do
-                testName=$(echo "$test" | jq -r '.name')
-                L1_values=$(echo "$test" | jq -r '.L1[]')
-                for L1_value in $L1_values; do
-                  echo "Running test: $testName with L1: $L1_value using $simulator"
-                  python testRunner_tiled_snitch.py -t Tests/$testName --cores=${{ inputs.num-cores }} --simulator=$simulator --l1 $L1_value --defaultMemLevel=${{ inputs.default-memory-level }} --memAllocStrategy=${{ inputs.memory-allocation-strategy }} --searchStrategy=${{ inputs.search-strategy }}
-                done
-              done
-            fi
-          done
+          pytest test_platforms.py -v -n 4 -m "snitch_tiled and ${{ inputs.pytest-marker }}"
         shell: bash
diff --git a/.github/workflows/_runner-snitch.yml b/.github/workflows/_runner-snitch.yml
index bc599e4fe7..624b3d3aa8 100644
--- a/.github/workflows/_runner-snitch.yml
+++ b/.github/workflows/_runner-snitch.yml
@@ -42,4 +42,4 @@ jobs:
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
           pytest test_platforms.py -v -n 4 -m "snitch and ${{ inputs.pytest-marker }}"
-        shell: bash
+        shell: bash
\ No newline at end of file
diff --git a/.github/workflows/_runner-softhier.yml b/.github/workflows/_runner-softhier.yml
index b448cd7505..8579380ee2 100644
--- a/.github/workflows/_runner-softhier.yml
+++ b/.github/workflows/_runner-softhier.yml
@@ -38,4 +38,4 @@ jobs:
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
           pytest test_platforms.py -v -n 4 -m "softhier and ${{ inputs.pytest-marker }}" --toolchain=GCC
-        shell: bash
+        shell: bash
\ No newline at end of file
diff --git a/.github/workflows/ci-platform-snitch-tiled.yml b/.github/workflows/ci-platform-snitch-tiled.yml
index 71d6a93009..5390d8ad16 100644
--- a/.github/workflows/ci-platform-snitch-tiled.yml
+++ b/.github/workflows/ci-platform-snitch-tiled.yml
@@ -35,16 +35,4 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      tests-config: |
-        [
-          {"name":"TestiNoNorm","L1":[5000,10000]},
-          {"name":"TestAdderLarge","L1":[5000,10000]},
-          {"name":"TestiSoftmaxLarge","L1":[5000,10000]},
-          {"name":"testRQGEMM","L1":[2000,5000]},
-          {"name":"testFloatSoftmax","L1":[2000,5000,10000]},
-          {"name":"TestRQAdd","L1":[5000,10000]},
-          {"name":"testFloatGEMM","L1":[2000,5000,10000]},
-          {"name":"testFloatGEMMtransB","L1":[2000,5000,10000]}
-        ]
-      simulators: |
-        gvsoc
+      pytest-marker: "kernels and singlebuffer and l2"
diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index 6d0605cfd6..43c2d77671 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -55,6 +55,7 @@ def pytest_configure(config: pytest.Config) -> None:
     config.addinivalue_line("markers", "chimera: mark test as a Chimera platform test")
     config.addinivalue_line("markers", "softhier: mark test as a SoftHier platform test")
     config.addinivalue_line("markers", "snitch: mark test as a Snitch platform test")
+    config.addinivalue_line("markers", "snitch_tiled: mark test as a Snitch platform test (tiled)")
     config.addinivalue_line("markers", "siracusa: mark test as a Siracusa platform test (untiled)")
     config.addinivalue_line("markers", "siracusa_tiled: mark test as a Siracusa platform test (tiled)")
     config.addinivalue_line("markers", "kernels: mark test as a kernel test (individual operators)")
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index 582bcee99b..cf6cd10aa8 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -21,6 +21,10 @@
 from test_snitch_config import DEFAULT_NUM_CORES as SNITCH_DEFAULT_NUM_CORES
 from test_snitch_config import KERNEL_TESTS as SNITCH_KERNEL_TESTS
 from test_snitch_config import MODEL_TESTS as SNITCH_MODEL_TESTS
+from test_snitch_tiled_config import L2_DOUBLEBUFFER_KERNELS as SNITCH_L2_DOUBLEBUFFER_KERNELS
+from test_snitch_tiled_config import L2_DOUBLEBUFFER_MODELS as SNITCH_L2_DOUBLEBUFFER_MODELS
+from test_snitch_tiled_config import L2_SINGLEBUFFER_KERNELS as SNITCH_L2_SINGLEBUFFER_KERNELS
+from test_snitch_tiled_config import L2_SINGLEBUFFER_MODELS as SNITCH_L2_SINGLEBUFFER_MODELS
 from test_softhier_config import DEFAULT_NUM_CLUSTERS as SOFTHIER_DEFAULT_NUM_CLUSTERS
 from test_softhier_config import KERNEL_TESTS as SOFTHIER_KERNEL_TESTS
 from test_softhier_config import MODEL_TESTS as SOFTHIER_MODEL_TESTS
@@ -101,7 +105,8 @@ def param_id(param):
 #   mempool: tests from the MemPool platform
 #   chimera: tests from the Chimera platform
 #   softhier: tests from the SoftHier platform
-#   snitch: tests from the Snitch platform
+#   snitch: tests from the Snitch platform (untiled)
+#   snitch_tiled: tests from the Snitch platform (tiled)
 #   siracusa: tests from the Siracusa platform (untiled)
 #   siracusa_tiled: tests from the Siracusa platform (tiled)
 # Test type markers:
@@ -532,3 +537,46 @@ def test_snitch_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
         tiling = False,
     )
     run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+### Snitch Tiled Platform Tests ###
+
+
+def generate_test_params_snitch(test_list, config_name):
+    """Generate test parameters for Snitch tiled tests."""
+    return [(test_name, l1, config_name) for test_name, l1 in test_list]
+
+
+@pytest.mark.snitch_tiled
+@pytest.mark.kernels
+@pytest.mark.singlebuffer
+@pytest.mark.l2
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params_snitch(SNITCH_L2_SINGLEBUFFER_KERNELS, "L2-singlebuffer"),
+    ids = param_id,
+)
+def test_snitch_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
+                                              skipgen, skipsim) -> None:
+    """Test Snitch tiled kernel tests (L2, single-buffer)."""
+    test_name, l1, config_name = test_params
+    
+    # Add Snitch-specific CMake args
+    snitch_cmake_args = cmake_args + [f"NUM_CORES={SNITCH_DEFAULT_NUM_CORES}"]
+    
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Snitch",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = snitch_cmake_args,
+        tiling = True,
+        cores = SNITCH_DEFAULT_NUM_CORES,
+        l1 = l1,
+        default_mem_level = "L2",
+        double_buffer = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
diff --git a/DeeployTest/test_snitch_tiled_config.py b/DeeployTest/test_snitch_tiled_config.py
new file mode 100644
index 0000000000..77099f55cd
--- /dev/null
+++ b/DeeployTest/test_snitch_tiled_config.py
@@ -0,0 +1,40 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test configuration for Snitch platform (tiled)."""
+
+# Snitch tiled platform supports gvsoc, banshee, vsim simulators
+# Default configuration: 9 cores, L2 default memory level
+
+DEFAULT_NUM_CORES = 9
+
+# L2 single-buffer tests with different L1 sizes
+# Format: (test_name, L1_size)
+L2_SINGLEBUFFER_KERNELS = [
+    ("TestiNoNorm", 5000),
+    ("TestiNoNorm", 10000),
+    ("TestAdderLarge", 5000),
+    ("TestAdderLarge", 10000),
+    ("TestiSoftmaxLarge", 5000),
+    ("TestiSoftmaxLarge", 10000),
+    ("testRQGEMM", 2000),
+    ("testRQGEMM", 5000),
+    ("testFloatSoftmax", 2000),
+    ("testFloatSoftmax", 5000),
+    ("testFloatSoftmax", 10000),
+    ("TestRQAdd", 5000),
+    ("TestRQAdd", 10000),
+    ("testFloatGEMM", 2000),
+    ("testFloatGEMM", 5000),
+    ("testFloatGEMM", 10000),
+    ("testFloatGEMMtransB", 2000),
+    ("testFloatGEMMtransB", 5000),
+    ("testFloatGEMMtransB", 10000),
+]
+
+L2_SINGLEBUFFER_MODELS = []
+
+# Currently no double-buffer configurations in CI
+L2_DOUBLEBUFFER_KERNELS = []
+L2_DOUBLEBUFFER_MODELS = []

From 9dc75df7d2acbe77f1e8f6bfcb720dd58b3cacbd Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Mon, 5 Jan 2026 17:04:17 +0100
Subject: [PATCH 24/51] Use the pytest suite in CI for CortexM platform

---
 .github/workflows/_runner-cortexm.yml     | 12 +++---------
 .github/workflows/ci-platform-cortexm.yml | 17 ++---------------
 2 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/_runner-cortexm.yml b/.github/workflows/_runner-cortexm.yml
index 70ee5b5d45..3fbdf0ee16 100644
--- a/.github/workflows/_runner-cortexm.yml
+++ b/.github/workflows/_runner-cortexm.yml
@@ -14,7 +14,7 @@ name: _runner-cortexm
       docker-image:
         required: true
         type: string
-      test-names:
+      pytest-marker:
         required: true
         type: string
 
@@ -36,16 +36,10 @@ jobs:
         with:
           path: /app/.ccache
           key: ccache-ci
-      - name: Run Test
+      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
         run: |
-          testNames="${{ inputs.test-names }}"
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          echo "$testNames" | while IFS= read -r testName; do
-            if [[ -n "$testName" ]]; then
-              echo "Running test: $testName"
-              python testRunner_cortexm.py -t Tests/$testName
-            fi
-          done
+          pytest test_platforms.py -v -n 4 -m "cortexm and ${{ inputs.pytest-marker }}"
         shell: bash
diff --git a/.github/workflows/ci-platform-cortexm.yml b/.github/workflows/ci-platform-cortexm.yml
index f9020f3646..0e03e17d0b 100644
--- a/.github/workflows/ci-platform-cortexm.yml
+++ b/.github/workflows/ci-platform-cortexm.yml
@@ -35,18 +35,7 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        Adder
-        MultIO
-        test1DPad
-        test2DPad
-        testMatMul
-        testMatMulAdd
-        testMaxPool
-        testRQConv
-        testReduceSum
-        testReduceMean
-        testSlice
+      pytest-marker: "kernels"
 
   cortexm-models:
     needs: select-env
@@ -54,6 +43,4 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-names: |
-        simpleRegression
-        WaveFormer
+      pytest-marker: "models"

From d3f73d46508d00a01896994f1768d32bb917f0d9 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 10:47:42 +0100
Subject: [PATCH 25/51] Add Siracusa Tiled with Neureka to the pytest suite

---
 ...nner-siracusa-neureka-tiled-sequential.yml |  50 +----
 .../ci-platform-siracusa-neureka-tiled.yml    |  86 +-------
 DeeployTest/conftest.py                       |   2 +
 DeeployTest/testUtils/pytestRunner.py         |   3 +-
 DeeployTest/test_platforms.py                 | 196 ++++++++++++++++++
 .../test_siracusa_neureka_tiled_config.py     |  58 ++++++
 6 files changed, 270 insertions(+), 125 deletions(-)
 create mode 100644 DeeployTest/test_siracusa_neureka_tiled_config.py

diff --git a/.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml b/.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
index b1a8915319..b1f5f2fcb3 100644
--- a/.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
+++ b/.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
@@ -14,33 +14,9 @@ name: _runner-siracusa-neureka-tiled-sequential
       docker-image:
         required: true
         type: string
-      tests-config:
+      pytest-marker:
         required: true
         type: string
-      num-cores:
-        required: false
-        default: 8
-        type: number
-      default-memory-level:
-        required: false
-        default: "L2"
-        type: string
-      double-buffer:
-        required: false
-        default: false
-        type: boolean
-      memory-allocation-strategy:
-        required: false
-        default: "MiniMalloc"
-        type: string
-      search-strategy:
-        required: false
-        default: "random-max"
-        type: string
-      neureka-wmem:
-        required: false
-        default: false
-        type: boolean
 
 jobs:
   test-runner-siracusa-neureka-tiled:
@@ -55,30 +31,10 @@ jobs:
       - name: Build Deeploy
         shell: bash
         run: pip install -e .
-      - name: Install jq
-        run: |
-          export DEBIAN_FRONTEND=noninteractive
-          apt-get update -y
-          apt-get install -y jq
-      - name: Cache ccache
-        uses: actions/cache/restore@v4
-        with:
-          path: /app/.ccache
-          key: ccache-ci
-      - name: Run Tests
+      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
         run: |
           cd DeeployTest
-          echo '${{ inputs.tests-config }}' > tests.json
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-
-          jq -c '.[]' tests.json | while read test; do
-            testName=$(echo "$test" | jq -r '.name')
-            L1_values=$(echo "$test" | jq -r '.L1[]')
-            for L1_value in $L1_values; do
-              echo "Running test: $testName with L1: $L1_value"
-              python testRunner_tiled_siracusa_w_neureka.py -t Tests/$testName --cores=${{ inputs.num-cores }} --l1 $L1_value --defaultMemLevel=${{ inputs.default-memory-level }} ${{ inputs.double-buffer && '--doublebuffer' || '' }} ${{ inputs.neureka-wmem && '--neureka-wmem' || '' }} --memAllocStrategy=${{ inputs.memory-allocation-strategy }} --searchStrategy=${{ inputs.search-strategy }}
-            done
-          done
-
+          pytest test_platforms.py -v -n 4 -m "siracusa_neureka_tiled and ${{ inputs.pytest-marker }}"
         shell: bash
diff --git a/.github/workflows/ci-platform-siracusa-neureka-tiled.yml b/.github/workflows/ci-platform-siracusa-neureka-tiled.yml
index e9f920931a..5e830a9c11 100644
--- a/.github/workflows/ci-platform-siracusa-neureka-tiled.yml
+++ b/.github/workflows/ci-platform-siracusa-neureka-tiled.yml
@@ -35,14 +35,7 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      tests-config: |
-        [
-          {"name":"testRequantizedLinear","L1":[16000]},
-          {"name":"testPointwise","L1":[32000]},
-          {"name":"testPointwiseConvBNReLU","L1":[32000]},
-          {"name":"testPointwiseUnsignedWeights","L1":[32000]}
-        ]
-      num-cores: 8
+      pytest-marker: "kernels and singlebuffer and l2 and not wmem"
 
   siracusa-neureka-kernels-tiled-doublebuffer-L2:
     needs: select-env
@@ -50,59 +43,23 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      tests-config: |
-        [
-          {"name":"testRequantizedLinear","L1":[16000]},
-          {"name":"testPointwise","L1":[32000]},
-          {"name":"testPointwiseConvBNReLU","L1":[32000]},
-          {"name":"testPointwiseUnsignedWeights","L1":[32000]}
-        ]
-      num-cores: 8
-      # double buffer enabled:
-      double-buffer: true
+      pytest-marker: "kernels and doublebuffer and l2 and not wmem"
 
   siracusa-neureka-models-tiled-singlebuffer-L3:
     needs: select-env
-    strategy:
-      fail-fast: false
-      matrix:
-        test-data:
-          - { name: "miniMobileNet", L1: [2000] } # LMACAN: 1000 leads to non-2d transfers in L3!
-          - { name: "Attention", L1: [2500] }
-          - { name: "Transformer", L1: [15000] }
-          - { name: "microLlama/microLlama1", L1: [10000] }
-        num-cores: [8]
-        default-memory-level: ["L3"]
-    uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
+    uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-data.name }}
-      num-cores: ${{ matrix.num-cores }}
-      L1: ${{ toJson(matrix.test-data.L1) }}
-      default-memory-level: ${{ matrix.default-memory-level }}
+      pytest-marker: "models and singlebuffer and l3 and not wmem"
 
   siracusa-neureka-models-tiled-doublebuffer-L3:
     needs: select-env
-    strategy:
-      fail-fast: false
-      matrix:
-        test-data:
-          - { name: "miniMobileNet", L1: [2000] } # LMACAN note
-          - { name: "Attention", L1: [5000] }
-          - { name: "Transformer", L1: [30000] }
-        num-cores: [8]
-        double-buffer: [true]
-        default-memory-level: ["L3"]
-    uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
+    uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-data.name }}
-      num-cores: ${{ matrix.num-cores }}
-      L1: ${{ toJson(matrix.test-data.L1) }}
-      double-buffer: ${{ matrix.double-buffer }}
-      default-memory-level: ${{ matrix.default-memory-level }}
+      pytest-marker: "models and doublebuffer and l3 and not wmem"
 
   siracusa-neureka-kernels-tiled-singlebuffer-L2-wmem:
     needs: select-env
@@ -110,37 +67,12 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      tests-config: |
-        [
-          {"name":"testRequantizedLinear","L1":[16000]},
-          {"name":"testPointwise","L1":[32000]},
-          {"name":"testPointwiseConvBNReLU","L1":[32000]},
-          {"name":"testPointwiseUnsignedWeights","L1":[32000]}
-        ]
-      num-cores: 8
-      neureka-wmem: true
+      pytest-marker: "kernels and singlebuffer and l2 and wmem"
 
   siracusa-neureka-models-tiled-doublebuffer-L3-wmem:
     needs: select-env
-    strategy:
-      fail-fast: false
-      matrix:
-        test-data:
-          - { name: "miniMobileNet", L1: [2000] } # LMACAN note
-          - { name: "Attention", L1: [3500] }
-          # - { name: "Transformer", L1: [30000] }
-          - { name: "microLlama/microLlama1", L1: [10000] }
-        num-cores: [8]
-        double-buffer: [true]
-        default-memory-level: ["L3"]
-        neureka-wmem: [true]
-    uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
+    uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-data.name }}
-      num-cores: ${{ matrix.num-cores }}
-      L1: ${{ toJson(matrix.test-data.L1) }}
-      double-buffer: ${{ matrix.double-buffer }}
-      default-memory-level: ${{ matrix.default-memory-level }}
-      neureka-wmem: ${{ matrix.neureka-wmem }}
+      pytest-marker: "models and doublebuffer and l3 and wmem"
diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index 43c2d77671..b70324d8d5 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -58,12 +58,14 @@ def pytest_configure(config: pytest.Config) -> None:
     config.addinivalue_line("markers", "snitch_tiled: mark test as a Snitch platform test (tiled)")
     config.addinivalue_line("markers", "siracusa: mark test as a Siracusa platform test (untiled)")
     config.addinivalue_line("markers", "siracusa_tiled: mark test as a Siracusa platform test (tiled)")
+    config.addinivalue_line("markers", "siracusa_neureka_tiled: mark test as a Siracusa + Neureka platform test (tiled)")
     config.addinivalue_line("markers", "kernels: mark test as a kernel test (individual operators)")
     config.addinivalue_line("markers", "models: mark test as a model test (full networks)")
     config.addinivalue_line("markers", "singlebuffer: mark test as single-buffer configuration")
     config.addinivalue_line("markers", "doublebuffer: mark test as double-buffer configuration")
     config.addinivalue_line("markers", "l2: mark test as L2 default memory level")
     config.addinivalue_line("markers", "l3: mark test as L3 default memory level")
+    config.addinivalue_line("markers", "wmem: mark test as using Neureka weight memory")
     config.addinivalue_line("markers", "slow: mark test as slow running")
 
     # Configure logging based on verbosity
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index f9a5cdbf68..09d19da735 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -379,6 +379,7 @@ def create_test_config(
     profile_tiling: bool = False,
     plot_mem_alloc: bool = False,
     randomized_mem_scheduler: bool = False,
+    gen_args: Optional[List[str]] = None,
 ) -> DeeployTestConfig:
     """
     Create DeeployTestConfig for a specific test and platform.
@@ -423,7 +424,7 @@ def create_test_config(
     if cores is not None:
         cmake_args_list.append(f"NUM_CORES={cores}")
 
-    gen_args_list = []
+    gen_args_list = list(gen_args) if gen_args else []
 
     if cores is not None and platform in ["Siracusa", "Siracusa_w_neureka"]:
         gen_args_list.append(f"--cores={cores}")
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index cf6cd10aa8..c3a210371e 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -18,6 +18,13 @@
 from test_siracusa_config import MODEL_TESTS as SIRACUSA_MODEL_TESTS
 from test_siracusa_tiled_config import L2_DOUBLEBUFFER_KERNELS, L2_DOUBLEBUFFER_MODELS, L2_SINGLEBUFFER_KERNELS, \
     L2_SINGLEBUFFER_MODELS, L3_DOUBLEBUFFER_MODELS, L3_SINGLEBUFFER_MODELS
+from test_siracusa_neureka_tiled_config import DEFAULT_CORES as NEUREKA_DEFAULT_CORES
+from test_siracusa_neureka_tiled_config import L2_DOUBLEBUFFER_KERNELS as NEUREKA_L2_DOUBLEBUFFER_KERNELS
+from test_siracusa_neureka_tiled_config import L2_SINGLEBUFFER_KERNELS as NEUREKA_L2_SINGLEBUFFER_KERNELS
+from test_siracusa_neureka_tiled_config import L2_SINGLEBUFFER_KERNELS_WMEM as NEUREKA_L2_SINGLEBUFFER_KERNELS_WMEM
+from test_siracusa_neureka_tiled_config import L3_DOUBLEBUFFER_MODELS as NEUREKA_L3_DOUBLEBUFFER_MODELS
+from test_siracusa_neureka_tiled_config import L3_DOUBLEBUFFER_MODELS_WMEM as NEUREKA_L3_DOUBLEBUFFER_MODELS_WMEM
+from test_siracusa_neureka_tiled_config import L3_SINGLEBUFFER_MODELS as NEUREKA_L3_SINGLEBUFFER_MODELS
 from test_snitch_config import DEFAULT_NUM_CORES as SNITCH_DEFAULT_NUM_CORES
 from test_snitch_config import KERNEL_TESTS as SNITCH_KERNEL_TESTS
 from test_snitch_config import MODEL_TESTS as SNITCH_MODEL_TESTS
@@ -109,6 +116,7 @@ def param_id(param):
 #   snitch_tiled: tests from the Snitch platform (tiled)
 #   siracusa: tests from the Siracusa platform (untiled)
 #   siracusa_tiled: tests from the Siracusa platform (tiled)
+#   siracusa_neureka_tiled: tests from the Siracusa + Neureka platform (tiled)
 # Test type markers:
 #   kernels: single kernel (or single layer) tests
 #   models: full model (multiple layer) tests
@@ -117,6 +125,7 @@ def param_id(param):
 #   doublebuffer: double-buffer tests
 #   l2: L2 default memory level
 #   l3: L3 default memory level
+#   wmem: with Neureka weight memory enabled
 
 
 @pytest.mark.generic
@@ -580,3 +589,190 @@ def test_snitch_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, too
     )
     run_and_assert_test(test_name, config, skipgen, skipsim)
 
+
+### Siracusa + Neureka Tiled Platform Tests ###
+
+
+@pytest.mark.siracusa_neureka_tiled
+@pytest.mark.kernels
+@pytest.mark.singlebuffer
+@pytest.mark.l2
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(NEUREKA_L2_SINGLEBUFFER_KERNELS, "L2-singlebuffer"),
+    ids = param_id,
+)
+def test_siracusa_neureka_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir,
+                                                        cmake_args, skipgen, skipsim) -> None:
+    """Test Siracusa + Neureka tiled kernel tests (L2, single-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa_w_neureka",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = NEUREKA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L2",
+        double_buffer = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa_neureka_tiled
+@pytest.mark.kernels
+@pytest.mark.doublebuffer
+@pytest.mark.l2
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(NEUREKA_L2_DOUBLEBUFFER_KERNELS, "L2-doublebuffer"),
+    ids = param_id,
+)
+def test_siracusa_neureka_tiled_kernels_l2_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir,
+                                                        cmake_args, skipgen, skipsim) -> None:
+    """Test Siracusa + Neureka tiled kernel tests (L2, double-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa_w_neureka",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = NEUREKA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L2",
+        double_buffer = True,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa_neureka_tiled
+@pytest.mark.models
+@pytest.mark.singlebuffer
+@pytest.mark.l3
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(NEUREKA_L3_SINGLEBUFFER_MODELS, "L3-singlebuffer"),
+    ids = param_id,
+)
+def test_siracusa_neureka_tiled_models_l3_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir,
+                                                       cmake_args, skipgen, skipsim) -> None:
+    """Test Siracusa + Neureka tiled model tests (L3, single-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa_w_neureka",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = NEUREKA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L3",
+        double_buffer = False,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa_neureka_tiled
+@pytest.mark.models
+@pytest.mark.doublebuffer
+@pytest.mark.l3
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(NEUREKA_L3_DOUBLEBUFFER_MODELS, "L3-doublebuffer"),
+    ids = param_id,
+)
+def test_siracusa_neureka_tiled_models_l3_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir,
+                                                       cmake_args, skipgen, skipsim) -> None:
+    """Test Siracusa + Neureka tiled model tests (L3, double-buffer)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa_w_neureka",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = NEUREKA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L3",
+        double_buffer = True,
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa_neureka_tiled
+@pytest.mark.kernels
+@pytest.mark.singlebuffer
+@pytest.mark.l2
+@pytest.mark.wmem
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(NEUREKA_L2_SINGLEBUFFER_KERNELS_WMEM, "L2-singlebuffer-wmem"),
+    ids = param_id,
+)
+def test_siracusa_neureka_tiled_kernels_l2_singlebuffer_wmem(test_params, deeploy_test_dir, toolchain, toolchain_dir,
+                                                             cmake_args, skipgen, skipsim) -> None:
+    """Test Siracusa + Neureka tiled kernel tests (L2, single-buffer, weight memory)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa_w_neureka",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = NEUREKA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L2",
+        double_buffer = False,
+        gen_args = ["--neureka-wmem"],
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
+
+@pytest.mark.siracusa_neureka_tiled
+@pytest.mark.models
+@pytest.mark.doublebuffer
+@pytest.mark.l3
+@pytest.mark.wmem
+@pytest.mark.parametrize(
+    "test_params",
+    generate_test_params(NEUREKA_L3_DOUBLEBUFFER_MODELS_WMEM, "L3-doublebuffer-wmem"),
+    ids = param_id,
+)
+def test_siracusa_neureka_tiled_models_l3_doublebuffer_wmem(test_params, deeploy_test_dir, toolchain, toolchain_dir,
+                                                            cmake_args, skipgen, skipsim) -> None:
+    """Test Siracusa + Neureka tiled model tests (L3, double-buffer, weight memory)."""
+    test_name, l1, config_name = test_params
+    config = create_test_config(
+        test_name = test_name,
+        platform = "Siracusa_w_neureka",
+        simulator = "gvsoc",
+        deeploy_test_dir = deeploy_test_dir,
+        toolchain = toolchain,
+        toolchain_dir = toolchain_dir,
+        cmake_args = cmake_args,
+        tiling = True,
+        cores = NEUREKA_DEFAULT_CORES,
+        l1 = l1,
+        default_mem_level = "L3",
+        double_buffer = True,
+        gen_args = ["--neureka-wmem"],
+    )
+    run_and_assert_test(test_name, config, skipgen, skipsim)
+
diff --git a/DeeployTest/test_siracusa_neureka_tiled_config.py b/DeeployTest/test_siracusa_neureka_tiled_config.py
new file mode 100644
index 0000000000..14c2b2740c
--- /dev/null
+++ b/DeeployTest/test_siracusa_neureka_tiled_config.py
@@ -0,0 +1,58 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Test configuration for Siracusa platform with Neureka accelerator (tiled)."""
+
+# Siracusa + Neureka platform with tiling support
+# Default configuration: 8 cores, gvsoc simulator
+
+DEFAULT_CORES = 8
+
+# L2 single-buffer kernel tests
+# Format: dict of {test_name: [L1_sizes]}
+L2_SINGLEBUFFER_KERNELS = {
+    "testRequantizedLinear": [16000],
+    "testPointwise": [32000],
+    "testPointwiseConvBNReLU": [32000],
+    "testPointwiseUnsignedWeights": [32000],
+}
+
+# L2 double-buffer kernel tests
+L2_DOUBLEBUFFER_KERNELS = {
+    "testRequantizedLinear": [16000],
+    "testPointwise": [32000],
+    "testPointwiseConvBNReLU": [32000],
+    "testPointwiseUnsignedWeights": [32000],
+}
+
+# L3 single-buffer model tests
+# Format: dict of {test_name: [L1_sizes]}
+L3_SINGLEBUFFER_MODELS = {
+    "miniMobileNet": [2000],
+    "Attention": [2500],
+    "Transformer": [15000],
+    "microLlama/microLlama1": [10000],
+}
+
+# L3 double-buffer model tests
+L3_DOUBLEBUFFER_MODELS = {
+    "miniMobileNet": [2000],
+    "Attention": [5000],
+    "Transformer": [30000],
+}
+
+# L2 single-buffer kernel tests with weight memory (neureka-wmem)
+L2_SINGLEBUFFER_KERNELS_WMEM = {
+    "testRequantizedLinear": [16000],
+    "testPointwise": [32000],
+    "testPointwiseConvBNReLU": [32000],
+    "testPointwiseUnsignedWeights": [32000],
+}
+
+# L3 double-buffer model tests with weight memory (neureka-wmem)
+L3_DOUBLEBUFFER_MODELS_WMEM = {
+    "miniMobileNet": [2000],
+    "Attention": [3500],
+    "microLlama/microLlama1": [10000],
+}

From 0f9b4048a0364c55a2f34742d44c7e036db6fd9d Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 10:51:52 +0100
Subject: [PATCH 26/51] Format and lint

---
 DeeployTest/conftest.py                           |  3 ++-
 DeeployTest/test_platforms.py                     | 12 ++++--------
 DeeployTest/test_siracusa_neureka_tiled_config.py |  1 -
 DeeployTest/test_snitch_tiled_config.py           |  1 -
 4 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index b70324d8d5..dc200daaf8 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -58,7 +58,8 @@ def pytest_configure(config: pytest.Config) -> None:
     config.addinivalue_line("markers", "snitch_tiled: mark test as a Snitch platform test (tiled)")
     config.addinivalue_line("markers", "siracusa: mark test as a Siracusa platform test (untiled)")
     config.addinivalue_line("markers", "siracusa_tiled: mark test as a Siracusa platform test (tiled)")
-    config.addinivalue_line("markers", "siracusa_neureka_tiled: mark test as a Siracusa + Neureka platform test (tiled)")
+    config.addinivalue_line("markers",
+                            "siracusa_neureka_tiled: mark test as a Siracusa + Neureka platform test (tiled)")
     config.addinivalue_line("markers", "kernels: mark test as a kernel test (individual operators)")
     config.addinivalue_line("markers", "models: mark test as a model test (full networks)")
     config.addinivalue_line("markers", "singlebuffer: mark test as single-buffer configuration")
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index c3a210371e..29bf235f06 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -16,8 +16,6 @@
 from test_siracusa_config import DEFAULT_CORES as SIRACUSA_DEFAULT_CORES
 from test_siracusa_config import KERNEL_TESTS as SIRACUSA_KERNEL_TESTS
 from test_siracusa_config import MODEL_TESTS as SIRACUSA_MODEL_TESTS
-from test_siracusa_tiled_config import L2_DOUBLEBUFFER_KERNELS, L2_DOUBLEBUFFER_MODELS, L2_SINGLEBUFFER_KERNELS, \
-    L2_SINGLEBUFFER_MODELS, L3_DOUBLEBUFFER_MODELS, L3_SINGLEBUFFER_MODELS
 from test_siracusa_neureka_tiled_config import DEFAULT_CORES as NEUREKA_DEFAULT_CORES
 from test_siracusa_neureka_tiled_config import L2_DOUBLEBUFFER_KERNELS as NEUREKA_L2_DOUBLEBUFFER_KERNELS
 from test_siracusa_neureka_tiled_config import L2_SINGLEBUFFER_KERNELS as NEUREKA_L2_SINGLEBUFFER_KERNELS
@@ -25,13 +23,12 @@
 from test_siracusa_neureka_tiled_config import L3_DOUBLEBUFFER_MODELS as NEUREKA_L3_DOUBLEBUFFER_MODELS
 from test_siracusa_neureka_tiled_config import L3_DOUBLEBUFFER_MODELS_WMEM as NEUREKA_L3_DOUBLEBUFFER_MODELS_WMEM
 from test_siracusa_neureka_tiled_config import L3_SINGLEBUFFER_MODELS as NEUREKA_L3_SINGLEBUFFER_MODELS
+from test_siracusa_tiled_config import L2_DOUBLEBUFFER_KERNELS, L2_DOUBLEBUFFER_MODELS, L2_SINGLEBUFFER_KERNELS, \
+    L2_SINGLEBUFFER_MODELS, L3_DOUBLEBUFFER_MODELS, L3_SINGLEBUFFER_MODELS
 from test_snitch_config import DEFAULT_NUM_CORES as SNITCH_DEFAULT_NUM_CORES
 from test_snitch_config import KERNEL_TESTS as SNITCH_KERNEL_TESTS
 from test_snitch_config import MODEL_TESTS as SNITCH_MODEL_TESTS
-from test_snitch_tiled_config import L2_DOUBLEBUFFER_KERNELS as SNITCH_L2_DOUBLEBUFFER_KERNELS
-from test_snitch_tiled_config import L2_DOUBLEBUFFER_MODELS as SNITCH_L2_DOUBLEBUFFER_MODELS
 from test_snitch_tiled_config import L2_SINGLEBUFFER_KERNELS as SNITCH_L2_SINGLEBUFFER_KERNELS
-from test_snitch_tiled_config import L2_SINGLEBUFFER_MODELS as SNITCH_L2_SINGLEBUFFER_MODELS
 from test_softhier_config import DEFAULT_NUM_CLUSTERS as SOFTHIER_DEFAULT_NUM_CLUSTERS
 from test_softhier_config import KERNEL_TESTS as SOFTHIER_KERNEL_TESTS
 from test_softhier_config import MODEL_TESTS as SOFTHIER_MODEL_TESTS
@@ -569,10 +566,10 @@ def test_snitch_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, too
                                               skipgen, skipsim) -> None:
     """Test Snitch tiled kernel tests (L2, single-buffer)."""
     test_name, l1, config_name = test_params
-    
+
     # Add Snitch-specific CMake args
     snitch_cmake_args = cmake_args + [f"NUM_CORES={SNITCH_DEFAULT_NUM_CORES}"]
-    
+
     config = create_test_config(
         test_name = test_name,
         platform = "Snitch",
@@ -775,4 +772,3 @@ def test_siracusa_neureka_tiled_models_l3_doublebuffer_wmem(test_params, deeploy
         gen_args = ["--neureka-wmem"],
     )
     run_and_assert_test(test_name, config, skipgen, skipsim)
-
diff --git a/DeeployTest/test_siracusa_neureka_tiled_config.py b/DeeployTest/test_siracusa_neureka_tiled_config.py
index 14c2b2740c..66fe52cfe3 100644
--- a/DeeployTest/test_siracusa_neureka_tiled_config.py
+++ b/DeeployTest/test_siracusa_neureka_tiled_config.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Test configuration for Siracusa platform with Neureka accelerator (tiled)."""
 
 # Siracusa + Neureka platform with tiling support
diff --git a/DeeployTest/test_snitch_tiled_config.py b/DeeployTest/test_snitch_tiled_config.py
index 77099f55cd..8fdedf6b42 100644
--- a/DeeployTest/test_snitch_tiled_config.py
+++ b/DeeployTest/test_snitch_tiled_config.py
@@ -1,7 +1,6 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Test configuration for Snitch platform (tiled)."""
 
 # Snitch tiled platform supports gvsoc, banshee, vsim simulators

From 50d00104453fdb6d55bf1a61d12eb7c0f9511a24 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 10:58:41 +0100
Subject: [PATCH 27/51] Fix SoftHier test runner call

---
 .github/workflows/_runner-softhier.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_runner-softhier.yml b/.github/workflows/_runner-softhier.yml
index 8579380ee2..558c9cd825 100644
--- a/.github/workflows/_runner-softhier.yml
+++ b/.github/workflows/_runner-softhier.yml
@@ -37,5 +37,5 @@ jobs:
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          pytest test_platforms.py -v -n 4 -m "softhier and ${{ inputs.pytest-marker }}" --toolchain=GCC
+          pytest test_platforms.py -v -n 4 -m "softhier and ${{ inputs.pytest-marker }}" --toolchain=GCC --toolchain-install-dir=$SOFTHIER_INSTALL_DIR/third_party/toolchain/install
         shell: bash
\ No newline at end of file

From 3f071a0659b273262ad3b10c92ee1a4c2831d9c4 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 11:38:45 +0100
Subject: [PATCH 28/51] Format and lint

---
 .github/workflows/_runner-snitch.yml   | 2 +-
 .github/workflows/_runner-softhier.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/_runner-snitch.yml b/.github/workflows/_runner-snitch.yml
index 624b3d3aa8..bc599e4fe7 100644
--- a/.github/workflows/_runner-snitch.yml
+++ b/.github/workflows/_runner-snitch.yml
@@ -42,4 +42,4 @@ jobs:
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
           pytest test_platforms.py -v -n 4 -m "snitch and ${{ inputs.pytest-marker }}"
-        shell: bash
\ No newline at end of file
+        shell: bash
diff --git a/.github/workflows/_runner-softhier.yml b/.github/workflows/_runner-softhier.yml
index 558c9cd825..b067664f40 100644
--- a/.github/workflows/_runner-softhier.yml
+++ b/.github/workflows/_runner-softhier.yml
@@ -38,4 +38,4 @@ jobs:
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
           pytest test_platforms.py -v -n 4 -m "softhier and ${{ inputs.pytest-marker }}" --toolchain=GCC --toolchain-install-dir=$SOFTHIER_INSTALL_DIR/third_party/toolchain/install
-        shell: bash
\ No newline at end of file
+        shell: bash

From e644108f92c05de394d6370a62a9ef05f9293d1a Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 14:02:27 +0100
Subject: [PATCH 29/51] Remove useless comments and add README for PyTest suite

---
 DeeployTest/README.md         | 60 +++++++++++++++++++++++++++++++++++
 DeeployTest/test_platforms.py | 30 ------------------
 2 files changed, 60 insertions(+), 30 deletions(-)
 create mode 100644 DeeployTest/README.md

diff --git a/DeeployTest/README.md b/DeeployTest/README.md
new file mode 100644
index 0000000000..0f94916d48
--- /dev/null
+++ b/DeeployTest/README.md
@@ -0,0 +1,60 @@
+
+
+# How to use the DeeployTest PyTest Suite?
+
+### Executing and Collecting Test Groups
+
+The test suite is located in the `DeeployTest` folder, all commands below are assumed to be executed from the `DeeployTest` folder. The test suite is grouped with different markers, you can list the markers with `pytest --markers`. This will return something like:
+```
+@pytest.mark.generic: mark test as a Generic platform test
+@pytest.mark.cortexm: mark test as a Cortex-M (QEMU-ARM) platform test
+@pytest.mark.mempool: mark test as a MemPool platform test
+```
+
+You can run all test from a given mark group with `pytest -m <marker-name> -v`. Each platform has a given marker, if you want to run all tests from the generic platform, you can use `pytest -m generic -v`.
+
+You can use boolean expressions on the markers to execute unions or intersections of markers. For instance, to execute only the kernel tests from the generic platform, one can use `pytest -m 'generic and kernels' -v`.
+
+To display the tests captured by a given marker or expression, you can use the `--collect-only` flag. For instance, to list the kernel tests on the Siracusa with Neureka platform that are from L2 and single-buffered, I can use `pytest -m 'siracusa_neureka_tiled and kernels and l2 and singlebuffer' -v --collect-only`, which returns:
+
+```
+platform linux -- Python 3.10.0, pytest-9.0.2, pluggy-1.6.0 -- /usr/scratch/normandie/jungvi/micromamba/envs/deeploy/bin/python3.10
+cachedir: .pytest_cache
+rootdir: /scratch/jungvi/Deeploy/DeeployTest
+configfile: pytest.ini
+plugins: xdist-3.8.0
+collected 378 items / 370 deselected / 8 selected
+
+<Dir DeeployTest>
+  <Module test_platforms.py>
+    <Function test_siracusa_neureka_tiled_kernels_l2_singlebuffer[testRequantizedLinear-16000-L2-singlebuffer]>
+    <Function test_siracusa_neureka_tiled_kernels_l2_singlebuffer[testPointwise-32000-L2-singlebuffer]>
+    <Function test_siracusa_neureka_tiled_kernels_l2_singlebuffer[testPointwiseConvBNReLU-32000-L2-singlebuffer]>
+    <Function test_siracusa_neureka_tiled_kernels_l2_singlebuffer[testPointwiseUnsignedWeights-32000-L2-singlebuffer]>
+    <Function test_siracusa_neureka_tiled_kernels_l2_singlebuffer_wmem[testRequantizedLinear-16000-L2-singlebuffer-wmem]>
+    <Function test_siracusa_neureka_tiled_kernels_l2_singlebuffer_wmem[testPointwise-32000-L2-singlebuffer-wmem]>
+    <Function test_siracusa_neureka_tiled_kernels_l2_singlebuffer_wmem[testPointwiseConvBNReLU-32000-L2-singlebuffer-wmem]>
+    <Function test_siracusa_neureka_tiled_kernels_l2_singlebuffer_wmem[testPointwiseUnsignedWeights-32000-L2-singlebuffer-wmem]>
+```
+
+### Executing a Single Test
+
+To run a single test, one can use the test identifier from the `--collect-only` output, for instance `pytest 'test_platforms.py::test_siracusa_neureka_tiled_kernels_l2_singlebuffer[testRequantizedLinear-16000-L2-singlebuffer]' -v`.
+
+### Controlling Test Verbosity
+
+By default, the test output is captured and displayed only if a test fails. If you want to see the captured output, use the `-s` flag. To increase the verbosity of the test, you can add more `v` to the `-v` flag, for instance, `-vvv` will display the commands executed during the test. You can filter the level of the messages from Python's built-in logging module with `--log-cli-level=<log-level>`. For instance, the following line captures only the commands executed by the tests:
+```
+pytest test_platforms.py -m "generic and kernels" -vvv --log-cli-level=DEBUG
+```
+
+### Parallelized Test Execution
+
+You can run tests in parallel with the `-n` flag followed by the number of parallel threads. For instance, to run all generic tests with 16 threads, you can use:
+```
+pytest test_platforms.py -m generic -v -n 16
+```
+
+### Misc
+
+When running `pytest -m <my-markers>` in a folder, PyTest will scan each file looking for tests. To speed up the detection you can specify the platform test file like `pytest test_platforms.py -m <my-markers>`.
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index 29bf235f06..12bcd89672 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -129,7 +129,6 @@ def param_id(param):
 @pytest.mark.kernels
 @pytest.mark.parametrize("test_name", GENERIC_KERNEL_TESTS, ids = GENERIC_KERNEL_TESTS)
 def test_generic_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test Generic platform kernel tests."""
     platform_config = PLATFORM_CONFIGS["generic"]
     config = create_test_config(
         test_name = test_name,
@@ -148,7 +147,6 @@ def test_generic_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 @pytest.mark.models
 @pytest.mark.parametrize("test_name", GENERIC_MODEL_TESTS, ids = GENERIC_MODEL_TESTS)
 def test_generic_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test Generic platform model tests."""
     platform_config = PLATFORM_CONFIGS["generic"]
     config = create_test_config(
         test_name = test_name,
@@ -167,7 +165,6 @@ def test_generic_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
 @pytest.mark.kernels
 @pytest.mark.parametrize("test_name", CORTEXM_KERNEL_TESTS, ids = CORTEXM_KERNEL_TESTS)
 def test_cortexm_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test Cortex-M platform kernel tests."""
     platform_config = PLATFORM_CONFIGS["cortexm"]
     config = create_test_config(
         test_name = test_name,
@@ -186,7 +183,6 @@ def test_cortexm_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 @pytest.mark.models
 @pytest.mark.parametrize("test_name", CORTEXM_MODEL_TESTS, ids = CORTEXM_MODEL_TESTS)
 def test_cortexm_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test Cortex-M platform model tests."""
     platform_config = PLATFORM_CONFIGS["cortexm"]
     config = create_test_config(
         test_name = test_name,
@@ -205,7 +201,6 @@ def test_cortexm_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
 @pytest.mark.kernels
 @pytest.mark.parametrize("test_name", MEMPOOL_KERNEL_TESTS, ids = MEMPOOL_KERNEL_TESTS)
 def test_mempool_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test MemPool platform kernel tests."""
     platform_config = PLATFORM_CONFIGS["mempool"]
 
     # Add MemPool-specific CMake args for number of threads
@@ -228,7 +223,6 @@ def test_mempool_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 @pytest.mark.models
 @pytest.mark.parametrize("test_name", MEMPOOL_MODEL_TESTS, ids = MEMPOOL_MODEL_TESTS)
 def test_mempool_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test MemPool platform model tests."""
     platform_config = PLATFORM_CONFIGS["mempool"]
 
     # Add MemPool-specific CMake args for number of threads
@@ -251,7 +245,6 @@ def test_mempool_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
 @pytest.mark.kernels
 @pytest.mark.parametrize("test_name", SIRACUSA_KERNEL_TESTS, ids = SIRACUSA_KERNEL_TESTS)
 def test_siracusa_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test Siracusa platform kernel tests (untiled)."""
     config = create_test_config(
         test_name = test_name,
         platform = "Siracusa",
@@ -270,7 +263,6 @@ def test_siracusa_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 @pytest.mark.models
 @pytest.mark.parametrize("test_name", SIRACUSA_MODEL_TESTS, ids = SIRACUSA_MODEL_TESTS)
 def test_siracusa_models(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test Siracusa platform model tests (untiled)."""
     config = create_test_config(
         test_name = test_name,
         platform = "Siracusa",
@@ -296,7 +288,6 @@ def test_siracusa_models(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 )
 def test_siracusa_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
                                                 skipgen, skipsim) -> None:
-    """Test Siracusa tiled kernel tests (L2, single-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -341,7 +332,6 @@ def test_siracusa_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, t
 )
 def test_siracusa_tiled_kernels_l2_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
                                                 skipgen, skipsim) -> None:
-    """Test Siracusa tiled kernel tests (L2, double-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -371,7 +361,6 @@ def test_siracusa_tiled_kernels_l2_doublebuffer(test_params, deeploy_test_dir, t
 )
 def test_siracusa_tiled_models_l2_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
                                                skipgen, skipsim) -> None:
-    """Test Siracusa tiled model tests (L2, single-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -401,7 +390,6 @@ def test_siracusa_tiled_models_l2_singlebuffer(test_params, deeploy_test_dir, to
 )
 def test_siracusa_tiled_models_l2_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
                                                skipgen, skipsim) -> None:
-    """Test Siracusa tiled model tests (L2, double-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -431,7 +419,6 @@ def test_siracusa_tiled_models_l2_doublebuffer(test_params, deeploy_test_dir, to
 )
 def test_siracusa_tiled_models_l3_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
                                                skipgen, skipsim) -> None:
-    """Test Siracusa tiled model tests (L3, single-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -461,7 +448,6 @@ def test_siracusa_tiled_models_l3_singlebuffer(test_params, deeploy_test_dir, to
 )
 def test_siracusa_tiled_models_l3_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
                                                skipgen, skipsim) -> None:
-    """Test Siracusa tiled model tests (L3, double-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -484,7 +470,6 @@ def test_siracusa_tiled_models_l3_doublebuffer(test_params, deeploy_test_dir, to
 @pytest.mark.kernels
 @pytest.mark.parametrize("test_name", CHIMERA_KERNEL_TESTS, ids = CHIMERA_KERNEL_TESTS)
 def test_chimera_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test Chimera platform kernel tests."""
     platform_config = PLATFORM_CONFIGS["chimera"]
     config = create_test_config(
         test_name = test_name,
@@ -503,7 +488,6 @@ def test_chimera_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 @pytest.mark.kernels
 @pytest.mark.parametrize("test_name", SOFTHIER_KERNEL_TESTS, ids = SOFTHIER_KERNEL_TESTS)
 def test_softhier_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test SoftHier platform kernel tests."""
     platform_config = PLATFORM_CONFIGS["softhier"]
 
     # Add SoftHier-specific CMake args for number of clusters
@@ -526,7 +510,6 @@ def test_softhier_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir,
 @pytest.mark.kernels
 @pytest.mark.parametrize("test_name", SNITCH_KERNEL_TESTS, ids = SNITCH_KERNEL_TESTS)
 def test_snitch_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen, skipsim) -> None:
-    """Test Snitch platform kernel tests."""
     platform_config = PLATFORM_CONFIGS["snitch"]
 
     # Add Snitch-specific CMake args for number of cores
@@ -545,9 +528,6 @@ def test_snitch_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
     run_and_assert_test(test_name, config, skipgen, skipsim)
 
 
-### Snitch Tiled Platform Tests ###
-
-
 def generate_test_params_snitch(test_list, config_name):
     """Generate test parameters for Snitch tiled tests."""
     return [(test_name, l1, config_name) for test_name, l1 in test_list]
@@ -564,7 +544,6 @@ def generate_test_params_snitch(test_list, config_name):
 )
 def test_snitch_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
                                               skipgen, skipsim) -> None:
-    """Test Snitch tiled kernel tests (L2, single-buffer)."""
     test_name, l1, config_name = test_params
 
     # Add Snitch-specific CMake args
@@ -587,9 +566,6 @@ def test_snitch_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, too
     run_and_assert_test(test_name, config, skipgen, skipsim)
 
 
-### Siracusa + Neureka Tiled Platform Tests ###
-
-
 @pytest.mark.siracusa_neureka_tiled
 @pytest.mark.kernels
 @pytest.mark.singlebuffer
@@ -601,7 +577,6 @@ def test_snitch_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, too
 )
 def test_siracusa_neureka_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir,
                                                         cmake_args, skipgen, skipsim) -> None:
-    """Test Siracusa + Neureka tiled kernel tests (L2, single-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -631,7 +606,6 @@ def test_siracusa_neureka_tiled_kernels_l2_singlebuffer(test_params, deeploy_tes
 )
 def test_siracusa_neureka_tiled_kernels_l2_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir,
                                                         cmake_args, skipgen, skipsim) -> None:
-    """Test Siracusa + Neureka tiled kernel tests (L2, double-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -661,7 +635,6 @@ def test_siracusa_neureka_tiled_kernels_l2_doublebuffer(test_params, deeploy_tes
 )
 def test_siracusa_neureka_tiled_models_l3_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir,
                                                        cmake_args, skipgen, skipsim) -> None:
-    """Test Siracusa + Neureka tiled model tests (L3, single-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -691,7 +664,6 @@ def test_siracusa_neureka_tiled_models_l3_singlebuffer(test_params, deeploy_test
 )
 def test_siracusa_neureka_tiled_models_l3_doublebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir,
                                                        cmake_args, skipgen, skipsim) -> None:
-    """Test Siracusa + Neureka tiled model tests (L3, double-buffer)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -722,7 +694,6 @@ def test_siracusa_neureka_tiled_models_l3_doublebuffer(test_params, deeploy_test
 )
 def test_siracusa_neureka_tiled_kernels_l2_singlebuffer_wmem(test_params, deeploy_test_dir, toolchain, toolchain_dir,
                                                              cmake_args, skipgen, skipsim) -> None:
-    """Test Siracusa + Neureka tiled kernel tests (L2, single-buffer, weight memory)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,
@@ -754,7 +725,6 @@ def test_siracusa_neureka_tiled_kernels_l2_singlebuffer_wmem(test_params, deeplo
 )
 def test_siracusa_neureka_tiled_models_l3_doublebuffer_wmem(test_params, deeploy_test_dir, toolchain, toolchain_dir,
                                                             cmake_args, skipgen, skipsim) -> None:
-    """Test Siracusa + Neureka tiled model tests (L3, double-buffer, weight memory)."""
     test_name, l1, config_name = test_params
     config = create_test_config(
         test_name = test_name,

From 91284fe86961d3aa47e0c8326cb93c49b7ebe3d0 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 14:53:14 +0100
Subject: [PATCH 30/51] Cleanup GitHub action for siracusa with neureka

---
 ...nner-siracusa-neureka-tiled-sequential.yml | 40 -------------
 .../_runner-siracusa-neureka-tiled.yml        | 60 +++----------------
 .../ci-platform-siracusa-neureka-tiled.yml    | 12 ++--
 3 files changed, 15 insertions(+), 97 deletions(-)
 delete mode 100644 .github/workflows/_runner-siracusa-neureka-tiled-sequential.yml

diff --git a/.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml b/.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
deleted file mode 100644
index b1f5f2fcb3..0000000000
--- a/.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
+++ /dev/null
@@ -1,40 +0,0 @@
-# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
----
-name: _runner-siracusa-neureka-tiled-sequential
-
-"on":
-  workflow_call:
-    inputs:
-      runner:
-        required: true
-        type: string
-      docker-image:
-        required: true
-        type: string
-      pytest-marker:
-        required: true
-        type: string
-
-jobs:
-  test-runner-siracusa-neureka-tiled:
-    runs-on: ${{ inputs.runner }}
-    container:
-      image: ${{ inputs.docker-image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
-        run: |
-          cd DeeployTest
-          mkdir -p /app/.ccache
-          export CCACHE_DIR=/app/.ccache
-          pytest test_platforms.py -v -n 4 -m "siracusa_neureka_tiled and ${{ inputs.pytest-marker }}"
-        shell: bash
diff --git a/.github/workflows/_runner-siracusa-neureka-tiled.yml b/.github/workflows/_runner-siracusa-neureka-tiled.yml
index 3e613a84cb..b1f5f2fcb3 100644
--- a/.github/workflows/_runner-siracusa-neureka-tiled.yml
+++ b/.github/workflows/_runner-siracusa-neureka-tiled.yml
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 ---
-name: _runner-siracusa-neureka-tiled
+name: _runner-siracusa-neureka-tiled-sequential
 
 "on":
   workflow_call:
@@ -14,44 +14,12 @@ name: _runner-siracusa-neureka-tiled
       docker-image:
         required: true
         type: string
-      test-name:
+      pytest-marker:
         required: true
         type: string
-      num-cores:
-        required: false
-        default: 8
-        type: number
-      L1:
-        required: false
-        default: "[64000]"
-        type: string
-      default-memory-level:
-        required: false
-        default: "L2"
-        type: string
-      double-buffer:
-        required: false
-        default: false
-        type: boolean
-      memory-allocation-strategy:
-        required: false
-        default: "MiniMalloc"
-        type: string
-      search-strategy:
-        required: false
-        default: "random-max"
-        type: string
-      neureka-wmem:
-        required: false
-        default: false
-        type: boolean
 
 jobs:
   test-runner-siracusa-neureka-tiled:
-    strategy:
-      fail-fast: false
-      matrix:
-        L1: ${{ fromJSON(inputs.L1) }}
     runs-on: ${{ inputs.runner }}
     container:
       image: ${{ inputs.docker-image }}
@@ -63,20 +31,10 @@ jobs:
       - name: Build Deeploy
         shell: bash
         run: pip install -e .
-      - name: Cache ccache
-        uses: actions/cache/restore@v4
-        with:
-          path: /app/.ccache
-          key: ccache-ci
-      - name: Run Test
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 5
-          max_attempts: 3
-          retry_on: timeout
-          command: |
-            cd DeeployTest
-            mkdir -p /app/.ccache
-            export CCACHE_DIR=/app/.ccache
-            python testRunner_tiled_siracusa_w_neureka.py -t Tests/${{ inputs.test-name }} --cores=${{ inputs.num-cores }} --l1 ${{ matrix.L1 }} --defaultMemLevel=${{ inputs.default-memory-level }} ${{ inputs.double-buffer && '--doublebuffer' || '' }} ${{ inputs.neureka-wmem && '--neureka-wmem' || '' }} --memAllocStrategy=${{ inputs.memory-allocation-strategy }} --searchStrategy=${{ inputs.search-strategy }}
-          shell: bash
+      - name: Run Test # VJUNG: Run tests with 4 parallel threads as GitHub action VM has 4 cores.
+        run: |
+          cd DeeployTest
+          mkdir -p /app/.ccache
+          export CCACHE_DIR=/app/.ccache
+          pytest test_platforms.py -v -n 4 -m "siracusa_neureka_tiled and ${{ inputs.pytest-marker }}"
+        shell: bash
diff --git a/.github/workflows/ci-platform-siracusa-neureka-tiled.yml b/.github/workflows/ci-platform-siracusa-neureka-tiled.yml
index 5e830a9c11..e76ee648c0 100644
--- a/.github/workflows/ci-platform-siracusa-neureka-tiled.yml
+++ b/.github/workflows/ci-platform-siracusa-neureka-tiled.yml
@@ -31,7 +31,7 @@ jobs:
 
   siracusa-neureka-kernels-tiled-singlebuffer-L2:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
+    uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
@@ -39,7 +39,7 @@ jobs:
 
   siracusa-neureka-kernels-tiled-doublebuffer-L2:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
+    uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
@@ -47,7 +47,7 @@ jobs:
 
   siracusa-neureka-models-tiled-singlebuffer-L3:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
+    uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
@@ -55,7 +55,7 @@ jobs:
 
   siracusa-neureka-models-tiled-doublebuffer-L3:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
+    uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
@@ -63,7 +63,7 @@ jobs:
 
   siracusa-neureka-kernels-tiled-singlebuffer-L2-wmem:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
+    uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
@@ -71,7 +71,7 @@ jobs:
 
   siracusa-neureka-models-tiled-doublebuffer-L3-wmem:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
+    uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}

From 2ba9e44ce1457d40225042f94ddcbb20e7b4cf38 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 14:53:34 +0100
Subject: [PATCH 31/51] Simplify siracusa tiled platform GitHub CI

---
 .../_runner-siracusa-tiled-kernels.yml        | 45 ----------
 .../_runner-siracusa-tiled-models.yml         | 54 ------------
 .github/workflows/_runner-siracusa-tiled.yml  | 38 +++++++++
 .../workflows/ci-platform-siracusa-tiled.yml  | 83 ++++---------------
 4 files changed, 54 insertions(+), 166 deletions(-)
 delete mode 100644 .github/workflows/_runner-siracusa-tiled-kernels.yml
 delete mode 100644 .github/workflows/_runner-siracusa-tiled-models.yml
 create mode 100644 .github/workflows/_runner-siracusa-tiled.yml

diff --git a/.github/workflows/_runner-siracusa-tiled-kernels.yml b/.github/workflows/_runner-siracusa-tiled-kernels.yml
deleted file mode 100644
index 118807035d..0000000000
--- a/.github/workflows/_runner-siracusa-tiled-kernels.yml
+++ /dev/null
@@ -1,45 +0,0 @@
-# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
----
-name: Siracusa Tiled Kernels Runner
-
-on:
-  workflow_call:
-    inputs:
-      runner:
-        required: true
-        type: string
-      docker-image:
-        required: true
-        type: string
-      memory-level:
-        required: true
-        type: string
-        description: 'Memory level marker (l2 or l3)'
-      buffer-mode:
-        required: true
-        type: string
-        description: 'Buffer mode marker (singlebuffer or doublebuffer)'
-
-jobs:
-  run-tests:
-    runs-on: ${{ inputs.runner }}
-    container:
-      image: ${{ inputs.docker-image }}
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-
-      - name: Install Deeploy
-        run: |
-          pip install -e .
-
-      - name: Run kernel tests
-        run: |
-          cd DeeployTest
-          pytest test_platforms.py -m "siracusa_tiled and kernels and ${{ inputs.memory-level }} and ${{ inputs.buffer-mode }}" -v
diff --git a/.github/workflows/_runner-siracusa-tiled-models.yml b/.github/workflows/_runner-siracusa-tiled-models.yml
deleted file mode 100644
index 65b1132ad9..0000000000
--- a/.github/workflows/_runner-siracusa-tiled-models.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
----
-name: Siracusa Tiled Models Runner
-
-on:
-  workflow_call:
-    inputs:
-      runner:
-        required: true
-        type: string
-      docker-image:
-        required: true
-        type: string
-      test-name:
-        required: true
-        type: string
-        description: 'Test name to run'
-      memory-level:
-        required: true
-        type: string
-        description: 'Memory level marker (l2 or l3)'
-      buffer-mode:
-        required: true
-        type: string
-        description: 'Buffer mode marker (singlebuffer or doublebuffer)'
-
-jobs:
-  run-test:
-    runs-on: ${{ inputs.runner }}
-    container:
-      image: ${{ inputs.docker-image }}
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-
-      - name: Install Deeploy
-        run: |
-          pip install -e .
-
-      - name: Run model test with retry
-        uses: nick-fields/retry@v3
-        with:
-          timeout_minutes: 20
-          max_attempts: 3
-          retry_on: error
-          command: |
-            cd DeeployTest
-            pytest test_platforms.py -k "${{ inputs.test-name }}-" -m "siracusa_tiled and models and ${{ inputs.memory-level }} and ${{ inputs.buffer-mode }}" -v
diff --git a/.github/workflows/_runner-siracusa-tiled.yml b/.github/workflows/_runner-siracusa-tiled.yml
new file mode 100644
index 0000000000..ea9c8989af
--- /dev/null
+++ b/.github/workflows/_runner-siracusa-tiled.yml
@@ -0,0 +1,38 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+---
+name: _runner-siracusa-tiled
+
+"on":
+  workflow_call:
+    inputs:
+      runner:
+        required: true
+        type: string
+      docker-image:
+        required: true
+        type: string
+      pytest-marker:
+        required: true
+        type: string
+
+jobs:
+  test-runner-siracusa-tiled:
+    runs-on: ${{ inputs.runner }}
+    container:
+      image: ${{ inputs.docker-image }}
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Build Deeploy
+        shell: bash
+        run: pip install -e .
+      - name: Run Test
+        run: |
+          cd DeeployTest
+          pytest test_platforms.py -v -m "siracusa_tiled and ${{ inputs.pytest-marker }}"
+        shell: bash
diff --git a/.github/workflows/ci-platform-siracusa-tiled.yml b/.github/workflows/ci-platform-siracusa-tiled.yml
index 4094092696..6597f3e625 100644
--- a/.github/workflows/ci-platform-siracusa-tiled.yml
+++ b/.github/workflows/ci-platform-siracusa-tiled.yml
@@ -29,107 +29,56 @@ jobs:
     with:
       docker_image_deeploy: ${{ inputs.docker_image_deeploy }}
 
-  generate-matrices:
-    runs-on: ubuntu-latest
-    outputs:
-      l2-singlebuffer-models: ${{ steps.generate.outputs.l2-singlebuffer-models }}
-      l2-doublebuffer-models: ${{ steps.generate.outputs.l2-doublebuffer-models }}
-      l3-singlebuffer-models: ${{ steps.generate.outputs.l3-singlebuffer-models }}
-      l3-doublebuffer-models: ${{ steps.generate.outputs.l3-doublebuffer-models }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: '3.10'
-
-      - name: Generate test matrices
-        id: generate
-        run: |
-          chmod +x scripts/generate_test_matrix.py
-          echo "l2-singlebuffer-models=$(scripts/generate_test_matrix.py l2-singlebuffer-models)" >> $GITHUB_OUTPUT
-          echo "l2-doublebuffer-models=$(scripts/generate_test_matrix.py l2-doublebuffer-models)" >> $GITHUB_OUTPUT
-          echo "l3-singlebuffer-models=$(scripts/generate_test_matrix.py l3-singlebuffer-models)" >> $GITHUB_OUTPUT
-          echo "l3-doublebuffer-models=$(scripts/generate_test_matrix.py l3-doublebuffer-models)" >> $GITHUB_OUTPUT
-
   # Kernel tests - L2 singlebuffer
   siracusa-kernels-tiled-l2-singlebuffer:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-tiled-kernels.yml
+    uses: ./.github/workflows/_runner-siracusa-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      memory-level: "l2"
-      buffer-mode: "singlebuffer"
+      pytest-marker: "kernels and l2 and singlebuffer"
 
   # Kernel tests - L2 doublebuffer
   siracusa-kernels-tiled-l2-doublebuffer:
     needs: select-env
-    uses: ./.github/workflows/_runner-siracusa-tiled-kernels.yml
+    uses: ./.github/workflows/_runner-siracusa-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      memory-level: "l2"
-      buffer-mode: "doublebuffer"
+      pytest-marker: "kernels and l2 and doublebuffer"
 
   # Model tests - L2 singlebuffer
   siracusa-models-tiled-l2-singlebuffer:
-    needs: [select-env, generate-matrices]
-    strategy:
-      fail-fast: false
-      matrix:
-        test-name: ${{ fromJSON(needs.generate-matrices.outputs.l2-singlebuffer-models) }}
-    uses: ./.github/workflows/_runner-siracusa-tiled-models.yml
+    needs: select-env
+    uses: ./.github/workflows/_runner-siracusa-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-name }}
-      memory-level: "l2"
-      buffer-mode: "singlebuffer"
+      pytest-marker: "models and l2 and singlebuffer"
 
   # Model tests - L2 doublebuffer
   siracusa-models-tiled-l2-doublebuffer:
-    needs: [select-env, generate-matrices]
-    strategy:
-      fail-fast: false
-      matrix:
-        test-name: ${{ fromJSON(needs.generate-matrices.outputs.l2-doublebuffer-models) }}
-    uses: ./.github/workflows/_runner-siracusa-tiled-models.yml
+    needs: select-env
+    uses: ./.github/workflows/_runner-siracusa-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-name }}
-      memory-level: "l2"
-      buffer-mode: "doublebuffer"
+      pytest-marker: "models and l2 and doublebuffer"
 
   # Model tests - L3 singlebuffer
   siracusa-models-tiled-l3-singlebuffer:
-    needs: [select-env, generate-matrices]
-    strategy:
-      fail-fast: false
-      matrix:
-        test-name: ${{ fromJSON(needs.generate-matrices.outputs.l3-singlebuffer-models) }}
-    uses: ./.github/workflows/_runner-siracusa-tiled-models.yml
+    needs: select-env
+    uses: ./.github/workflows/_runner-siracusa-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-name }}
-      memory-level: "l3"
-      buffer-mode: "singlebuffer"
+      pytest-marker: "models and l3 and singlebuffer"
 
   # Model tests - L3 doublebuffer
   siracusa-models-tiled-l3-doublebuffer:
-    needs: [select-env, generate-matrices]
-    strategy:
-      fail-fast: false
-      matrix:
-        test-name: ${{ fromJSON(needs.generate-matrices.outputs.l3-doublebuffer-models) }}
-    uses: ./.github/workflows/_runner-siracusa-tiled-models.yml
+    needs: select-env
+    uses: ./.github/workflows/_runner-siracusa-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-name: ${{ matrix.test-name }}
-      memory-level: "l3"
-      buffer-mode: "doublebuffer"
+      pytest-marker: "models and l3 and doublebuffer"

From 03820f821728c460aa0b1d0dfc6b388447957f29 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 15:30:34 +0100
Subject: [PATCH 32/51] Remove unused testRunner

---
 DeeployTest/testRunner_chimera.py             |  25 -----
 DeeployTest/testRunner_cortexm.py             |  16 ---
 DeeployTest/testRunner_generic.py             |  16 ---
 DeeployTest/testRunner_mempool.py             |  23 ----
 DeeployTest/testRunner_siracusa.py            |  31 ------
 DeeployTest/testRunner_siracusa_l3dma.py      |  98 -----------------
 DeeployTest/testRunner_siracusa_mchandma.py   |  99 -----------------
 DeeployTest/testRunner_snitch.py              |  30 -----
 DeeployTest/testRunner_snitch_dma.py          | 104 ------------------
 DeeployTest/testRunner_softhier.py            |  30 -----
 DeeployTest/testRunner_tiled_siracusa.py      |  24 ----
 .../testRunner_tiled_siracusa_w_neureka.py    |  49 ---------
 DeeployTest/testRunner_tiled_snitch.py        |  32 ------
 13 files changed, 577 deletions(-)
 delete mode 100644 DeeployTest/testRunner_chimera.py
 delete mode 100644 DeeployTest/testRunner_cortexm.py
 delete mode 100644 DeeployTest/testRunner_generic.py
 delete mode 100644 DeeployTest/testRunner_mempool.py
 delete mode 100644 DeeployTest/testRunner_siracusa.py
 delete mode 100644 DeeployTest/testRunner_siracusa_l3dma.py
 delete mode 100644 DeeployTest/testRunner_siracusa_mchandma.py
 delete mode 100644 DeeployTest/testRunner_snitch.py
 delete mode 100644 DeeployTest/testRunner_snitch_dma.py
 delete mode 100644 DeeployTest/testRunner_softhier.py
 delete mode 100644 DeeployTest/testRunner_tiled_siracusa.py
 delete mode 100644 DeeployTest/testRunner_tiled_siracusa_w_neureka.py
 delete mode 100644 DeeployTest/testRunner_tiled_snitch.py

diff --git a/DeeployTest/testRunner_chimera.py b/DeeployTest/testRunner_chimera.py
deleted file mode 100644
index d3a7093a72..0000000000
--- a/DeeployTest/testRunner_chimera.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-
-    parser = TestRunnerArgumentParser(
-        tiling_arguments = False,
-        description = "Deeploy Code Generation Utility for the Chimera Platform (Host, no Tiling).")
-
-    parser.add_argument('--simulator',
-                        metavar = "<simulator>",
-                        dest = "simulator",
-                        type = str,
-                        choices = ["gvsoc"],
-                        default = "gvsoc",
-                        help = "Select the simulator to use")
-
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "Chimera", simulator = args.simulator, tiling = False, argument_parser = parser)
-
-    testRunner.run()
diff --git a/DeeployTest/testRunner_cortexm.py b/DeeployTest/testRunner_cortexm.py
deleted file mode 100644
index 64d6246b74..0000000000
--- a/DeeployTest/testRunner_cortexm.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-
-    parser = TestRunnerArgumentParser(
-        tiling_arguments = False,
-        description = "Deeploy Code Generation Utility for the ARM (QEMU) Platform (no Tiling).")
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "QEMU-ARM", simulator = "qemu", tiling = False, argument_parser = parser)
-
-    testRunner.run()
diff --git a/DeeployTest/testRunner_generic.py b/DeeployTest/testRunner_generic.py
deleted file mode 100644
index 496b1db394..0000000000
--- a/DeeployTest/testRunner_generic.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-
-    parser = TestRunnerArgumentParser(
-        tiling_arguments = False,
-        description = "Deeploy Code Generation Utility for the Generic Platform (Host Machine, no Tiling).")
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "Generic", simulator = "host", tiling = False, argument_parser = parser)
-
-    testRunner.run()
diff --git a/DeeployTest/testRunner_mempool.py b/DeeployTest/testRunner_mempool.py
deleted file mode 100644
index 919603f454..0000000000
--- a/DeeployTest/testRunner_mempool.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: 2022 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-    parser = TestRunnerArgumentParser(
-        tiling_arguments = False, description = "Deeploy Code Generation Utility for the MemPool Platform (no Tiling).")
-
-    parser.add_argument('-n',
-                        metavar = 'num_threads',
-                        dest = 'num_threads',
-                        type = int,
-                        default = 16,
-                        help = 'Number of parallel threads\n')
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "MemPool", simulator = "banshee", tiling = False, argument_parser = parser)
-
-    testRunner.cmake_args += f" -D num_threads={args.num_threads}"
-
-    testRunner.run()
diff --git a/DeeployTest/testRunner_siracusa.py b/DeeployTest/testRunner_siracusa.py
deleted file mode 100644
index b83046de55..0000000000
--- a/DeeployTest/testRunner_siracusa.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-
-    parser = TestRunnerArgumentParser(
-        tiling_arguments = False,
-        description = "Deeploy Code Generation Utility for the Siracusa Platform (no Tiling).")
-
-    parser.add_argument('--cores',
-                        metavar = '<cores>',
-                        dest = 'cores',
-                        type = int,
-                        default = 8,
-                        help = 'Set number of cluster cores')
-
-    parser.add_argument('--profileUntiled',
-                        action = 'store_true',
-                        dest = 'profileUntiled',
-                        default = False,
-                        help = 'Profile Untiled')
-
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "Siracusa", simulator = "gvsoc", tiling = False, argument_parser = parser)
-
-    testRunner.cmake_args += f" -D NUM_CORES={args.cores}"
-    testRunner.run()
diff --git a/DeeployTest/testRunner_siracusa_l3dma.py b/DeeployTest/testRunner_siracusa_l3dma.py
deleted file mode 100644
index b70d8dda22..0000000000
--- a/DeeployTest/testRunner_siracusa_l3dma.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-
-import numpy as np
-from testUtils.codeGenerate import generateTestNetwork
-from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, MemcpyTypeChecker, generate_graph, \
-    memcpyTemplate, prepare_deployer_with_custom_tiling, setup_pulp_deployer
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-from testUtils.typeMapping import baseTypeFromName, dtypeFromDeeployType
-
-from Deeploy.AbstractDataTypes import PointerClass
-from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
-    MemoryManagementGeneration
-from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, _NoVerbosity
-from Deeploy.Targets.PULPOpen.Bindings import L3MemoryAwareFunctionCallClosure, TilingCallClosure
-from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
-from Deeploy.Targets.PULPOpen.DMA.L3Dma import l3DmaHack
-from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \
-    TilingVariableReplacementUpdate
-from Deeploy.TilingExtension.TilerExtension import TilingReadyNodeBindings
-
-testRunnerArgumentParser = TestRunnerArgumentParser(tiling_arguments = True)
-testRunnerArgumentParser.add_argument('--input-shape',
-                                      nargs = '+',
-                                      required = True,
-                                      dest = 'input_shape',
-                                      type = int,
-                                      help = "Shape of the copied tensor")
-testRunnerArgumentParser.add_argument('--tile-shape',
-                                      nargs = '+',
-                                      required = True,
-                                      dest = 'tile_shape',
-                                      type = int,
-                                      help = "Shape of the tiles produced in the manual tiling solution")
-testRunnerArgumentParser.add_argument('--node-count',
-                                      dest = 'node_count',
-                                      type = int,
-                                      default = 1,
-                                      help = "Number of generated memcpy nodes")
-testRunnerArgumentParser.add_argument('--type', type = str, default = "uint8_t", help = "Tensor elements datatype")
-testRunner = TestRunner('Siracusa', 'gvsoc', True, testRunnerArgumentParser)
-
-inputShape = testRunner._args.input_shape
-tileShape = testRunner._args.tile_shape
-node_count = testRunner._args.node_count
-_type = baseTypeFromName(testRunner._args.type)
-dtype = dtypeFromDeeployType(_type)
-defaultMemory = "L3"
-targetMemory = "L2"
-
-assert len(inputShape) == len(tileShape), \
-    f'Input and tile shape should be of the same dimensionality. Received {len(inputShape)}D input shape vs. {len(tileShape)}D tile shape.'
-assert all(tileDim <= inDim for inDim, tileDim in zip(inputShape, tileShape)), \
-    f'Each tile shape dimension should be smaller then the corresponding input one. Received {tileShape} > {inputShape}'
-
-graph = generate_graph(node_count, inputShape, dtype)
-inputTypes = {"input_0": PointerClass(_type)}
-_DEEPLOYSTATEDIR = os.path.join(testRunner._dir_gen, "deeployStates")
-deployer = setup_pulp_deployer(defaultMemory, targetMemory, graph, inputTypes, testRunner._args.doublebuffer,
-                               _DEEPLOYSTATEDIR)
-
-transformer = CodeTransformation([
-    TilingVariableReplacement(targetMemory),
-    TilingCallClosure(writeback = False, generateStruct = True),
-    TilingVariableReplacementUpdate(targetMemory),
-    PULPL3Tiling("L3", "L2", l3DmaHack),
-    ArgumentStructGeneration(),
-    L3MemoryAwareFunctionCallClosure(writeback = False),
-    MemoryManagementGeneration("L2"),
-    MemoryManagementGeneration("L3.*"),
-    MemoryManagementGeneration(),
-])
-
-binding = NodeBinding(MemcpyTypeChecker(), memcpyTemplate, transformer)
-tilingReadyBindings = TilingReadyNodeBindings([binding], MemcpyTileConstraint())
-memcpyMapper = NodeMapper(MemcpyParser(), tilingReadyBindings)
-memcpyMapping = {"Memcpy": MemcpyLayer([memcpyMapper])}
-deployer.Platform.engines[0].Mapping.update(memcpyMapping)
-
-prepare_deployer_with_custom_tiling(deployer, defaultMemory, targetMemory, tileShape, testRunner._args.doublebuffer)
-
-if not testRunner._args.skipgen:
-    if dtype == np.float32:
-        test_inputs = np.random.rand(*inputShape)
-    else:
-        info = np.iinfo(dtype)
-        test_inputs = np.arange(stop = np.prod(inputShape), dtype = dtype).reshape(inputShape)
-    test_outputs = test_inputs
-    generateTestNetwork(deployer, [test_inputs], [test_outputs], testRunner._dir_gen, _NoVerbosity)
-
-# Deconstructed testRunner.run() with skipped generation because we did the generation already
-testRunner.configure_cmake_project()
-testRunner.build_binary()
-if not testRunner._args.skipsim:
-    testRunner.run_simulation()
diff --git a/DeeployTest/testRunner_siracusa_mchandma.py b/DeeployTest/testRunner_siracusa_mchandma.py
deleted file mode 100644
index 56ed6f5a14..0000000000
--- a/DeeployTest/testRunner_siracusa_mchandma.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-
-import numpy as np
-from testUtils.codeGenerate import generateTestNetwork
-from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, MemcpyTypeChecker, generate_graph, \
-    memcpyTemplate, prepare_deployer_with_custom_tiling, setup_pulp_deployer
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-from testUtils.typeMapping import baseTypeFromName, dtypeFromDeeployType
-
-from Deeploy.AbstractDataTypes import PointerClass
-from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
-    MemoryManagementGeneration
-from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, _NoVerbosity
-from Deeploy.Targets.PULPOpen.Bindings import MemoryAwareFunctionCallClosure, TilingCallClosure
-from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
-from Deeploy.Targets.PULPOpen.DMA.MchanDma import MchanDma
-from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \
-    TilingVariableReplacementUpdate
-from Deeploy.TilingExtension.TilerExtension import TilingReadyNodeBindings
-
-testRunnerArgumentParser = TestRunnerArgumentParser(tiling_arguments = True)
-testRunnerArgumentParser.add_argument('--input-shape',
-                                      nargs = '+',
-                                      required = True,
-                                      dest = 'input_shape',
-                                      type = int,
-                                      help = "Shape of the copied tensor")
-testRunnerArgumentParser.add_argument('--tile-shape',
-                                      nargs = '+',
-                                      required = True,
-                                      dest = 'tile_shape',
-                                      type = int,
-                                      help = "Shape of the tiles produced in the manual tiling solution")
-testRunnerArgumentParser.add_argument('--node-count',
-                                      dest = 'node_count',
-                                      type = int,
-                                      default = 1,
-                                      help = "Number of generated memcpy nodes")
-testRunnerArgumentParser.add_argument('--type', type = str, default = "uint8_t", help = "Tensor elements datatype")
-testRunner = TestRunner('Siracusa', 'gvsoc', True, testRunnerArgumentParser)
-
-inputShape = testRunner._args.input_shape
-tileShape = testRunner._args.tile_shape
-node_count = testRunner._args.node_count
-_type = baseTypeFromName(testRunner._args.type)
-dtype = dtypeFromDeeployType(_type)
-defaultMemory = "L2"
-targetMemory = "L1"
-
-assert len(inputShape) == len(tileShape), \
-    f'Input and tile shape should be of the same dimensionality. Received {len(inputShape)}D input shape vs. {len(tileShape)}D tile shape.'
-assert all(tileDim <= inDim for inDim, tileDim in zip(inputShape, tileShape)), \
-    f'Each tile shape dimension should be smaller then the corresponding input one. Received {tileShape} > {inputShape}'
-
-graph = generate_graph(node_count, inputShape, dtype)
-inputTypes = {"input_0": PointerClass(_type)}
-_DEEPLOYSTATEDIR = os.path.join(testRunner._dir_gen, "deeployStates")
-deployer = setup_pulp_deployer(defaultMemory, targetMemory, graph, inputTypes, testRunner._args.doublebuffer,
-                               _DEEPLOYSTATEDIR)
-
-transformer = CodeTransformation([
-    TilingVariableReplacement(targetMemory),
-    TilingCallClosure(writeback = False, generateStruct = True),
-    TilingVariableReplacementUpdate(targetMemory),
-    PULPClusterTiling(defaultMemory, targetMemory, MchanDma()),
-    ArgumentStructGeneration(),
-    MemoryManagementGeneration(targetMemory),
-    TilingVariableReplacement(defaultMemory),
-    MemoryAwareFunctionCallClosure(writeback = False, generateStruct = True),
-    MemoryManagementGeneration(defaultMemory),
-    MemoryManagementGeneration(),
-])
-
-binding = NodeBinding(MemcpyTypeChecker(), memcpyTemplate, transformer)
-tilingReadyBindings = TilingReadyNodeBindings([binding], MemcpyTileConstraint())
-memcpyMapper = NodeMapper(MemcpyParser(), tilingReadyBindings)
-memcpyMapping = {"Memcpy": MemcpyLayer([memcpyMapper])}
-deployer.Platform.engines[0].Mapping.update(memcpyMapping)
-
-prepare_deployer_with_custom_tiling(deployer, defaultMemory, targetMemory, tileShape, testRunner._args.doublebuffer)
-
-if not testRunner._args.skipgen:
-    if dtype == np.float32:
-        test_inputs = np.random.rand(*inputShape)
-    else:
-        info = np.iinfo(dtype)
-        test_inputs = np.arange(stop = np.prod(inputShape), dtype = dtype).reshape(inputShape)
-    test_outputs = test_inputs
-    generateTestNetwork(deployer, [test_inputs], [test_outputs], testRunner._dir_gen, _NoVerbosity)
-
-# Deconstructed testRunner.run() with skipped generation because we did the generation already
-testRunner.configure_cmake_project()
-testRunner.build_binary()
-if not testRunner._args.skipsim:
-    testRunner.run_simulation()
diff --git a/DeeployTest/testRunner_snitch.py b/DeeployTest/testRunner_snitch.py
deleted file mode 100644
index 42a7353818..0000000000
--- a/DeeployTest/testRunner_snitch.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-
-    parser = TestRunnerArgumentParser(
-        tiling_arguments = False, description = "Deeploy Code Generation Utility for the Snitch Platform (no Tiling).")
-
-    parser.add_argument('--cores',
-                        metavar = '<cores>',
-                        dest = 'cores',
-                        type = int,
-                        default = 9,
-                        help = 'Set number of cluster cores')
-    parser.add_argument('--simulator',
-                        metavar = "<simulator>",
-                        dest = "simulator",
-                        type = str,
-                        choices = ["gvsoc", "banshee", "vsim", "vsim.gui"],
-                        default = "gvsoc",
-                        help = "Select the simulator to use")
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "Snitch", simulator = args.simulator, tiling = False, argument_parser = parser)
-
-    testRunner.cmake_args += f" -D NUM_CORES={args.cores}"
-    testRunner.run()
diff --git a/DeeployTest/testRunner_snitch_dma.py b/DeeployTest/testRunner_snitch_dma.py
deleted file mode 100644
index 80073ac5ed..0000000000
--- a/DeeployTest/testRunner_snitch_dma.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import os
-
-import numpy as np
-from testUtils.codeGenerate import generateTestNetwork
-from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, MemcpyTypeChecker, generate_graph, \
-    memcpyTemplate, prepare_deployer_with_custom_tiling, setup_snitch_deployer
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-from testUtils.typeMapping import baseTypeFromName, dtypeFromDeeployType
-
-from Deeploy.AbstractDataTypes import PointerClass
-from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
-    MemoryManagementGeneration
-from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, _NoVerbosity
-from Deeploy.Targets.Snitch.Bindings import MemoryAwareFunctionCallClosure, TilingCallClosure
-from Deeploy.Targets.Snitch.CodeTransformationPasses import SnitchClusterTiling
-from Deeploy.Targets.Snitch.CodeTransformationPasses.SnitchClusterSynch import SnitchSynchCoresPass
-from Deeploy.Targets.Snitch.CodeTransformationPasses.SnitchCoreFilter import SnitchCoreFilterPass
-from Deeploy.Targets.Snitch.CodeTransformationPasses.SnitchProfileExecutionBlock import SnitchProfileExecutionBlockPass
-from Deeploy.Targets.Snitch.DMA.SnitchDma import SnitchDma
-from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \
-    TilingVariableReplacementUpdate
-from Deeploy.TilingExtension.TilerExtension import TilingReadyNodeBindings
-
-testRunnerArgumentParser = TestRunnerArgumentParser(tiling_arguments = True)
-testRunnerArgumentParser.add_argument('--input-shape',
-                                      nargs = '+',
-                                      required = True,
-                                      dest = 'input_shape',
-                                      type = int,
-                                      help = "Shape of the copied tensor")
-testRunnerArgumentParser.add_argument('--tile-shape',
-                                      nargs = '+',
-                                      required = True,
-                                      dest = 'tile_shape',
-                                      type = int,
-                                      help = "Shape of the tiles produced in the manual tiling solution")
-testRunnerArgumentParser.add_argument('--node-count',
-                                      dest = 'node_count',
-                                      type = int,
-                                      default = 1,
-                                      help = "Number of generated memcpy nodes")
-testRunnerArgumentParser.add_argument('--type', type = str, default = "uint8_t", help = "Tensor elements datatype")
-testRunner = TestRunner('Snitch', 'gvsoc', tiling = True, argument_parser = testRunnerArgumentParser)
-
-inputShape = testRunner._args.input_shape
-tileShape = testRunner._args.tile_shape
-node_count = testRunner._args.node_count
-_type = baseTypeFromName(testRunner._args.type)
-dtype = dtypeFromDeeployType(_type)
-defaultMemory = "L2"
-targetMemory = "L1"
-
-assert len(inputShape) == len(tileShape), \
-    f'Input and tile shape should be of the same dimensionality. Received {len(inputShape)}D input shape vs. {len(tileShape)}D tile shape.'
-assert all(tileDim <= inDim for inDim, tileDim in zip(inputShape, tileShape)), \
-    f'Each tile shape dimension should be smaller then the corresponding input one. Received {tileShape} > {inputShape}'
-
-graph = generate_graph(node_count, inputShape, dtype)
-inputTypes = {"input_0": PointerClass(_type)}
-_DEEPLOYSTATEDIR = os.path.join(testRunner._dir_gen, "deeployStates")
-deployer = setup_snitch_deployer(defaultMemory, targetMemory, graph, inputTypes, testRunner._args.doublebuffer,
-                                 _DEEPLOYSTATEDIR)
-
-transformer = CodeTransformation([
-    SnitchCoreFilterPass("compute"),
-    SnitchProfileExecutionBlockPass(),
-    TilingVariableReplacement(targetMemory),
-    TilingCallClosure(writeback = False),
-    SnitchSynchCoresPass(),
-    TilingVariableReplacementUpdate(targetMemory),
-    SnitchClusterTiling(defaultMemory, targetMemory, SnitchDma()),
-    ArgumentStructGeneration(),
-    MemoryManagementGeneration(targetMemory),
-    MemoryAwareFunctionCallClosure(writeback = False, generateStruct = True),
-    MemoryManagementGeneration(defaultMemory),
-    MemoryManagementGeneration(),
-])
-
-binding = NodeBinding(MemcpyTypeChecker(), memcpyTemplate, transformer)
-tilingReadyBindings = TilingReadyNodeBindings([binding], MemcpyTileConstraint())
-memcpyMapper = NodeMapper(MemcpyParser(), tilingReadyBindings)
-memcpyMapping = {"Memcpy": MemcpyLayer([memcpyMapper])}
-deployer.Platform.engines[0].Mapping.update(memcpyMapping)
-
-prepare_deployer_with_custom_tiling(deployer, defaultMemory, targetMemory, tileShape, testRunner._args.doublebuffer)
-
-if not testRunner._args.skipgen:
-    if dtype == np.float32:
-        test_inputs = np.random.rand(*inputShape)
-    else:
-        info = np.iinfo(dtype)
-        test_inputs = np.arange(stop = np.prod(inputShape), dtype = dtype).reshape(inputShape)
-    test_outputs = test_inputs
-    generateTestNetwork(deployer, [test_inputs], [test_outputs], testRunner._dir_gen, _NoVerbosity)
-
-# Deconstructed testRunner.run() with skipped generation because we did the generation already
-testRunner.configure_cmake_project()
-testRunner.build_binary()
-if not testRunner._args.skipsim:
-    testRunner.run_simulation()
diff --git a/DeeployTest/testRunner_softhier.py b/DeeployTest/testRunner_softhier.py
deleted file mode 100644
index 9350dce4cf..0000000000
--- a/DeeployTest/testRunner_softhier.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# SPDX-FileCopyrightText: 2024 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-    parser = TestRunnerArgumentParser(
-        tiling_arguments = False,
-        description = "Deeploy Code Generation Utility for the Single Cluster SoftHier (no Tiling).")
-
-    parser.add_argument('--num_clusters',
-                        metavar = 'num_clusters',
-                        dest = 'num_clusters',
-                        type = int,
-                        default = 1,
-                        help = 'Number of clusters\n')
-
-    parser.add_argument('--verbose', metavar = 'verbose', dest = 'verbose', type = int, default = 2, help = 'verbose\n')
-
-    for action in parser._actions:
-        if action.dest == 'toolchain_install_dir':
-            action.default = "${SOFTHIER_INSTALL_DIR}/third_party/toolchain/install"
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "SoftHier", simulator = "gvsoc", tiling = False, argument_parser = parser)
-
-    testRunner.cmake_args += f" -D num_clusters={args.num_clusters}"
-
-    testRunner.run()
diff --git a/DeeployTest/testRunner_tiled_siracusa.py b/DeeployTest/testRunner_tiled_siracusa.py
deleted file mode 100644
index 7bf08b7b28..0000000000
--- a/DeeployTest/testRunner_tiled_siracusa.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-
-    parser = TestRunnerArgumentParser(
-        tiling_arguments = True, description = "Deeploy Code Generation Utility for the Siracusa Platform (Tiling).")
-
-    parser.add_argument('--cores',
-                        metavar = '<cores>',
-                        dest = 'cores',
-                        type = int,
-                        default = 8,
-                        help = 'Set number of cluster cores')
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "Siracusa", simulator = "gvsoc", tiling = True, argument_parser = parser)
-
-    testRunner.cmake_args += f" -D NUM_CORES={args.cores}"
-
-    testRunner.run()
diff --git a/DeeployTest/testRunner_tiled_siracusa_w_neureka.py b/DeeployTest/testRunner_tiled_siracusa_w_neureka.py
deleted file mode 100644
index 435f32b895..0000000000
--- a/DeeployTest/testRunner_tiled_siracusa_w_neureka.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-
-    parser = TestRunnerArgumentParser(
-        tiling_arguments = True,
-        description = "Deeploy Code Generation Utility for the Siracusa Platform (Tiling & NEureka).")
-
-    parser.add_argument('--cores',
-                        metavar = '<cores>',
-                        dest = 'cores',
-                        type = int,
-                        default = 8,
-                        help = 'Set number of cluster cores')
-    parser.add_argument('--neureka-wmem',
-                        dest = "neureka_wmem",
-                        action = "store_true",
-                        default = False,
-                        help = 'Adds weight memory and neureka engine color\n')
-    parser.add_argument('--enable-3x3',
-                        dest = "enable_3x3",
-                        action = "store_true",
-                        default = False,
-                        help = 'Adds EXPERIMENTAL support for 3x3 convolutions on N-EUREKA\n')
-    parser.add_argument('--enableStrides',
-                        dest = "enableStrides",
-                        action = "store_true",
-                        default = False,
-                        help = 'Adds EXPERIMENTAL support for strided convolutions on N-EUREKA\n')
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "Siracusa_w_neureka",
-                            simulator = "gvsoc",
-                            tiling = True,
-                            argument_parser = parser)
-
-    testRunner.cmake_args += f" -D NUM_CORES={args.cores}"
-    if args.neureka_wmem:
-        testRunner.gen_args += " --neureka-wmem"
-    if args.enable_3x3:
-        testRunner.gen_args += " --enable-3x3"
-    if args.enableStrides:
-        testRunner.gen_args += " --enableStrides"
-
-    testRunner.run()
diff --git a/DeeployTest/testRunner_tiled_snitch.py b/DeeployTest/testRunner_tiled_snitch.py
deleted file mode 100644
index 7787d1f844..0000000000
--- a/DeeployTest/testRunner_tiled_snitch.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# SPDX-FileCopyrightText: 2023 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from testUtils.testRunner import TestRunner, TestRunnerArgumentParser
-
-if __name__ == "__main__":
-
-    parser = TestRunnerArgumentParser(tiling_arguments = True,
-                                      description = "Deeploy Code Generation Utility for the Snitch Platform (Tiling).")
-
-    parser.add_argument('--cores',
-                        metavar = '<cores>',
-                        dest = 'cores',
-                        type = int,
-                        default = 9,
-                        help = 'Set number of cluster cores')
-    parser.add_argument('--simulator',
-                        metavar = "<simulator>",
-                        dest = "simulator",
-                        type = str,
-                        choices = ["gvsoc", "banshee", "vsim", "vsim.gui"],
-                        default = "gvsoc",
-                        help = "Select the simulator to use")
-
-    args = parser.parse_args()
-
-    testRunner = TestRunner(platform = "Snitch", simulator = args.simulator, tiling = True, argument_parser = parser)
-
-    testRunner.cmake_args += f" -D NUM_CORES={args.cores}"
-
-    testRunner.run()

From 597a8a228cc9087abaf5db88399fcb03bbc375b0 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 15:30:50 +0100
Subject: [PATCH 33/51] Update ccache generation to use PyTest suite

---
 .github/workflows/infra-generate-ccache.yml | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/infra-generate-ccache.yml b/.github/workflows/infra-generate-ccache.yml
index 721f09870b..9c1c9de8ff 100644
--- a/.github/workflows/infra-generate-ccache.yml
+++ b/.github/workflows/infra-generate-ccache.yml
@@ -34,15 +34,15 @@ jobs:
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          python testRunner_generic.py -t ./Tests/Adder
-          python testRunner_mempool.py -t ./Tests/Adder
-          python testRunner_cortexm.py -t ./Tests/Adder
-          python testRunner_snitch.py -t ./Tests/Adder
-          python testRunner_tiled_snitch.py -t ./Tests/Adder
-          python testRunner_siracusa.py -t ./Tests/Adder
-          python testRunner_tiled_siracusa.py -t ./Tests/Adder
-          python testRunner_tiled_siracusa_w_neureka.py -t ./Tests/Adder
-          python testRunner_chimera.py -t ./Tests/Adder
+          pytest 'test_platforms.py::test_generic_kernels[Adder]' --skipsim
+          pytest 'test_platforms.py::test_mempool_kernels[Adder]' --skipsim
+          pytest 'test_platforms.py::test_cortexm_kernels[Adder]' --skipsim
+          pytest 'test_platforms.py::test_snitch_kernels[Adder]' --skipsim
+          pytest 'test_platforms.py::test_snitch_tiled_kernels[Adder-128000-L2]' --skipsim
+          pytest 'test_platforms.py::test_siracusa_kernels[Adder]' --skipsim
+          pytest 'test_platforms.py::test_siracusa_tiled_kernels_singlebuffer_l2[Adder-64000-L2-singlebuffer]' --skipsim
+          pytest 'test_platforms.py::test_siracusa_neureka_tiled_kernels_l2_singlebuffer[testRequantizedLinear-16000-L2-singlebuffer]' --skipsim
+          pytest 'test_platforms.py::test_chimera_kernels[Adder]' --skipsim
       - name: Clean and Upload CCache
         uses: actions/cache@v4
         with:

From dd4d3430f16a887a07d203d49a8597412b578be4 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Tue, 6 Jan 2026 15:43:38 +0100
Subject: [PATCH 34/51] Cleanup generate_test_param function

---
 DeeployTest/test_platforms.py           |  7 +----
 DeeployTest/test_snitch_tiled_config.py | 39 +++++++++----------------
 2 files changed, 15 insertions(+), 31 deletions(-)

diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index 12bcd89672..32fece84ec 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -528,18 +528,13 @@ def test_snitch_kernels(test_name, deeploy_test_dir, toolchain, toolchain_dir, c
     run_and_assert_test(test_name, config, skipgen, skipsim)
 
 
-def generate_test_params_snitch(test_list, config_name):
-    """Generate test parameters for Snitch tiled tests."""
-    return [(test_name, l1, config_name) for test_name, l1 in test_list]
-
-
 @pytest.mark.snitch_tiled
 @pytest.mark.kernels
 @pytest.mark.singlebuffer
 @pytest.mark.l2
 @pytest.mark.parametrize(
     "test_params",
-    generate_test_params_snitch(SNITCH_L2_SINGLEBUFFER_KERNELS, "L2-singlebuffer"),
+    generate_test_params(SNITCH_L2_SINGLEBUFFER_KERNELS, "L2-singlebuffer"),
     ids = param_id,
 )
 def test_snitch_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, toolchain, toolchain_dir, cmake_args,
diff --git a/DeeployTest/test_snitch_tiled_config.py b/DeeployTest/test_snitch_tiled_config.py
index 8fdedf6b42..4e3662da05 100644
--- a/DeeployTest/test_snitch_tiled_config.py
+++ b/DeeployTest/test_snitch_tiled_config.py
@@ -9,31 +9,20 @@
 DEFAULT_NUM_CORES = 9
 
 # L2 single-buffer tests with different L1 sizes
-# Format: (test_name, L1_size)
-L2_SINGLEBUFFER_KERNELS = [
-    ("TestiNoNorm", 5000),
-    ("TestiNoNorm", 10000),
-    ("TestAdderLarge", 5000),
-    ("TestAdderLarge", 10000),
-    ("TestiSoftmaxLarge", 5000),
-    ("TestiSoftmaxLarge", 10000),
-    ("testRQGEMM", 2000),
-    ("testRQGEMM", 5000),
-    ("testFloatSoftmax", 2000),
-    ("testFloatSoftmax", 5000),
-    ("testFloatSoftmax", 10000),
-    ("TestRQAdd", 5000),
-    ("TestRQAdd", 10000),
-    ("testFloatGEMM", 2000),
-    ("testFloatGEMM", 5000),
-    ("testFloatGEMM", 10000),
-    ("testFloatGEMMtransB", 2000),
-    ("testFloatGEMMtransB", 5000),
-    ("testFloatGEMMtransB", 10000),
-]
+# Format: {test_name: [L1_sizes]}
+L2_SINGLEBUFFER_KERNELS = {
+    "TestiNoNorm": [5000, 10000],
+    "TestAdderLarge": [5000, 10000],
+    "TestiSoftmaxLarge": [5000, 10000],
+    "testRQGEMM": [2000, 5000],
+    "testFloatSoftmax": [2000, 5000, 10000],
+    "TestRQAdd": [5000, 10000],
+    "testFloatGEMM": [2000, 5000, 10000],
+    "testFloatGEMMtransB": [2000, 5000, 10000],
+}
 
-L2_SINGLEBUFFER_MODELS = []
+L2_SINGLEBUFFER_MODELS = {}
 
 # Currently no double-buffer configurations in CI
-L2_DOUBLEBUFFER_KERNELS = []
-L2_DOUBLEBUFFER_MODELS = []
+L2_DOUBLEBUFFER_KERNELS = {}
+L2_DOUBLEBUFFER_MODELS = {}

From 77634a4abb6f18f0baba03e18f3b2b4ee19f4b88 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Thu, 8 Jan 2026 11:56:15 +0100
Subject: [PATCH 35/51] DMA test migration to pytest works but only when
 running test one by one (state contamination)

---
 DeeployTest/conftest.py  |   4 +
 DeeployTest/test_dmas.py | 337 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 341 insertions(+)
 create mode 100644 DeeployTest/test_dmas.py

diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index dc200daaf8..15617d0f19 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -67,6 +67,10 @@ def pytest_configure(config: pytest.Config) -> None:
     config.addinivalue_line("markers", "l2: mark test as L2 default memory level")
     config.addinivalue_line("markers", "l3: mark test as L3 default memory level")
     config.addinivalue_line("markers", "wmem: mark test as using Neureka weight memory")
+    config.addinivalue_line("markers", "dma: mark test as DMA test")
+    config.addinivalue_line("markers", "mchan_dma: mark test as MchanDma test (Siracusa L2→L1)")
+    config.addinivalue_line("markers", "l3_dma: mark test as L3Dma test (Siracusa L3→L2)")
+    config.addinivalue_line("markers", "snitch_dma: mark test as SnitchDma test (Snitch L2→L1)")
     config.addinivalue_line("markers", "slow: mark test as slow running")
 
     # Configure logging based on verbosity
diff --git a/DeeployTest/test_dmas.py b/DeeployTest/test_dmas.py
new file mode 100644
index 0000000000..396b4a1fc5
--- /dev/null
+++ b/DeeployTest/test_dmas.py
@@ -0,0 +1,337 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+"""DMA test suite for Siracusa and Snitch platforms.
+
+Tests three DMA implementations across various tensor shapes and configurations:
+- MchanDma: Siracusa L2→L1 DMA transfers
+- L3Dma: Siracusa L3→L2 DMA transfers
+- SnitchDma: Snitch L2→L1 DMA transfers
+
+Total test matrix: 3 DMAs × 10 shapes × 2 buffering modes = 60 tests
+"""
+
+import os
+import shutil
+from pathlib import Path
+
+import numpy as np
+import pytest
+from testUtils.codeGenerate import generateTestNetwork
+from testUtils.dmaUtils import (MemcpyLayer, MemcpyParser, MemcpyTileConstraint, MemcpyTypeChecker, generate_graph,
+                                 memcpyTemplate, prepare_deployer_with_custom_tiling, setup_pulp_deployer,
+                                 setup_snitch_deployer)
+from testUtils.pytestRunner import build_binary, configure_cmake, get_test_paths, get_worker_id
+from testUtils.typeMapping import baseTypeFromName, dtypeFromDeeployType
+
+from Deeploy.AbstractDataTypes import PointerClass
+from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import (ArgumentStructGeneration,
+                                                                                 MemoryManagementGeneration)
+from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, _NoVerbosity
+from Deeploy.Targets.PULPOpen.Bindings import MemoryAwareFunctionCallClosure as PULPMemoryAwareFunctionCallClosure
+from Deeploy.Targets.PULPOpen.Bindings import TilingCallClosure as PULPTilingCallClosure
+from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
+from Deeploy.Targets.PULPOpen.DMA.L3Dma import L3Dma
+from Deeploy.Targets.PULPOpen.DMA.MchanDma import MchanDma
+from Deeploy.Targets.Snitch.Bindings import MemoryAwareFunctionCallClosure, TilingCallClosure
+from Deeploy.Targets.Snitch.CodeTransformationPasses import SnitchClusterTiling
+from Deeploy.Targets.Snitch.CodeTransformationPasses.SnitchClusterSynch import SnitchSynchCoresPass
+from Deeploy.Targets.Snitch.CodeTransformationPasses.SnitchCoreFilter import SnitchCoreFilterPass
+from Deeploy.Targets.Snitch.CodeTransformationPasses.SnitchProfileExecutionBlock import SnitchProfileExecutionBlockPass
+from Deeploy.Targets.Snitch.DMA.SnitchDma import SnitchDma
+from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import (
+    TilingVariableReplacement, TilingVariableReplacementUpdate)
+from Deeploy.TilingExtension.TilerExtension import TilingReadyNodeBindings
+
+# Test shape configurations: (input_shape, tile_shape, node_count, data_type)
+DMA_TEST_SHAPES = [
+    ((10, 10), (10, 10), 1, "uint8_t"),
+    ((10, 10), (10, 4), 1, "uint8_t"),
+    ((10, 10), (10, 4), 1, "uint16_t"),
+    ((10, 10), (10, 4), 1, "uint32_t"),
+    ((10, 10), (3, 4), 1, "uint32_t"),
+    ((10, 10), (3, 4), 2, "uint32_t"),
+    ((10, 10, 10), (2, 3, 4), 1, "uint8_t"),
+    ((10, 10, 10, 10), (2, 3, 5, 4), 1, "uint8_t"),
+    ((10, 10, 10, 10), (2, 3, 5, 4), 1, "uint32_t"),
+    ((10, 10, 10, 10, 10), (2, 3, 5, 7, 4), 1, "uint8_t"),
+]
+
+
+def param_id_dma(val):
+    """Generate readable test IDs for DMA parametrized tests."""
+    if isinstance(val, tuple) and len(val) == 4:
+        input_shape, tile_shape, node_count, data_type = val
+        shape_str = "x".join(map(str, input_shape))
+        tile_str = "x".join(map(str, tile_shape))
+        return f"{shape_str}_tile{tile_str}_n{node_count}_{data_type}"
+    elif isinstance(val, bool):
+        return "doublebuffer" if val else "singlebuffer"
+    return str(val)
+
+
+def setup_dma_deployer(dma_type: str, input_shape: tuple, tile_shape: tuple, node_count: int, data_type: str,
+                        doublebuffer: bool, gen_dir: str):
+    """
+    Set up deployer for DMA testing with custom tiling.
+    
+    Args:
+        dma_type: DMA implementation ("MchanDma", "L3Dma", "SnitchDma")
+        input_shape: Tensor shape to copy
+        tile_shape: Tiling dimensions
+        node_count: Number of memcpy nodes
+        data_type: Data type (uint8_t, uint16_t, uint32_t)
+        doublebuffer: Enable double buffering
+        gen_dir: Generation directory
+        
+    Returns:
+        tuple: (deployer, test_inputs, test_outputs)
+    """
+    _type = baseTypeFromName(data_type)
+    dtype = dtypeFromDeeployType(_type)
+
+    # Validate shapes
+    assert len(input_shape) == len(tile_shape), \
+        f'Input and tile shape must have same dimensionality: {len(input_shape)}D vs {len(tile_shape)}D'
+    assert all(tileDim <= inDim for inDim, tileDim in zip(input_shape, tile_shape)), \
+        f'Tile shape {tile_shape} must be <= input shape {input_shape}'
+
+    # DMA-specific configuration
+    if dma_type == "MchanDma":
+        defaultMemory = "L2"
+        targetMemory = "L1"
+        dma_obj = MchanDma()
+    elif dma_type == "L3Dma":
+        defaultMemory = "L3"
+        targetMemory = "L2"
+        dma_obj = L3Dma()
+    elif dma_type == "SnitchDma":
+        defaultMemory = "L2"
+        targetMemory = "L1"
+        dma_obj = SnitchDma()
+    else:
+        raise ValueError(f"Unknown DMA type: {dma_type}")
+
+    # Generate graph and setup deployer
+    graph = generate_graph(node_count, input_shape, dtype)
+    inputTypes = {"input_0": PointerClass(_type)}
+    _DEEPLOYSTATEDIR = os.path.join(gen_dir, "deeployStates")
+    
+    if dma_type == "SnitchDma":
+        deployer = setup_snitch_deployer(defaultMemory, targetMemory, graph, inputTypes, doublebuffer, _DEEPLOYSTATEDIR)
+    else:
+        deployer = setup_pulp_deployer(defaultMemory, targetMemory, graph, inputTypes, doublebuffer, _DEEPLOYSTATEDIR)
+
+    # Create transformer with DMA-specific passes
+    if dma_type == "SnitchDma":
+        transformer = CodeTransformation([
+            SnitchCoreFilterPass("compute"),
+            SnitchProfileExecutionBlockPass(),
+            TilingVariableReplacement(targetMemory),
+            TilingCallClosure(writeback = False),
+            SnitchSynchCoresPass(),
+            TilingVariableReplacementUpdate(targetMemory),
+            SnitchClusterTiling(defaultMemory, targetMemory, dma_obj),
+            ArgumentStructGeneration(),
+            MemoryManagementGeneration(targetMemory),
+            MemoryAwareFunctionCallClosure(writeback = False, generateStruct = True),
+            MemoryManagementGeneration(defaultMemory),
+            MemoryManagementGeneration(),
+        ])
+    else:  # MchanDma, L3Dma
+        transformer = CodeTransformation([
+            TilingVariableReplacement(targetMemory),
+            PULPTilingCallClosure(writeback = False, generateStruct = True),
+            TilingVariableReplacementUpdate(targetMemory),
+            PULPClusterTiling(defaultMemory, targetMemory, dma_obj),
+            ArgumentStructGeneration(),
+            MemoryManagementGeneration(targetMemory),
+            TilingVariableReplacement(defaultMemory),
+            PULPMemoryAwareFunctionCallClosure(writeback = False, generateStruct = True),
+            MemoryManagementGeneration(defaultMemory),
+            MemoryManagementGeneration(),
+        ])
+
+    # Set up bindings
+    binding = NodeBinding(MemcpyTypeChecker(), memcpyTemplate, transformer)
+    tilingReadyBindings = TilingReadyNodeBindings([binding], MemcpyTileConstraint())
+    memcpyMapper = NodeMapper(MemcpyParser(), tilingReadyBindings)
+    memcpyMapping = {"Memcpy": MemcpyLayer([memcpyMapper])}
+    deployer.Platform.engines[0].Mapping.update(memcpyMapping)
+
+    # Prepare custom tiling
+    prepare_deployer_with_custom_tiling(deployer, defaultMemory, targetMemory, tile_shape, doublebuffer)
+
+    # Generate test inputs/outputs
+    if dtype == np.float32:
+        test_inputs = np.random.rand(*input_shape)
+    else:
+        test_inputs = np.arange(stop = np.prod(input_shape), dtype = dtype).reshape(input_shape)
+    test_outputs = test_inputs
+
+    return deployer, test_inputs, test_outputs
+
+
+@pytest.mark.dma
+@pytest.mark.mchan_dma
+@pytest.mark.siracusa_tiled
+@pytest.mark.parametrize("test_shape", DMA_TEST_SHAPES, ids = param_id_dma)
+@pytest.mark.parametrize("doublebuffer", [True, False], ids = param_id_dma)
+def test_mchan_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen,
+                   skipsim) -> None:
+    """Test MchanDma (Siracusa L2→L1 DMA transfers)."""
+    input_shape, tile_shape, node_count, data_type = test_shape
+
+    # Setup paths
+    test_name = f"testMchanDma_{param_id_dma(test_shape)}_{param_id_dma(doublebuffer)}"
+    platform = "Siracusa"
+    gen_dir, _, test_name_clean = get_test_paths(f"test_dma_gen/{test_name}", platform, base_dir = deeploy_test_dir)
+
+    # Generate network
+    if not skipgen:
+        # Clean gen_dir to avoid stale state
+        if os.path.exists(gen_dir):
+            shutil.rmtree(gen_dir)
+        deployer, test_inputs, test_outputs = setup_dma_deployer("MchanDma", input_shape, tile_shape, node_count,
+                                                                   data_type, doublebuffer, gen_dir)
+        generateTestNetwork(deployer, [test_inputs], [test_outputs], gen_dir, _NoVerbosity)
+
+    # Build and run
+    worker_id = get_worker_id()
+    if worker_id == "master":
+        build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / "build_master")
+    else:
+        build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}")
+
+    from testUtils.pytestRunner import DeeployTestConfig
+    config = DeeployTestConfig(
+        test_name = test_name_clean,
+        test_dir = gen_dir,
+        platform = platform,
+        simulator = 'gvsoc',
+        tiling = True,
+        gen_dir = gen_dir,
+        build_dir = build_dir,
+        toolchain = toolchain,
+        toolchain_install_dir = toolchain_dir,
+        cmake_args = list(cmake_args) + ["NUM_CORES=8"],
+    )
+
+    configure_cmake(config)
+    build_binary(config)
+
+    if not skipsim:
+        from testUtils.pytestRunner import run_simulation
+        result = run_simulation(config)
+        assert result.success, f"MchanDma test failed with {result.error_count} errors"
+        assert result.error_count == 0, f"Found {result.error_count} errors"
+
+
+@pytest.mark.dma
+@pytest.mark.l3_dma
+@pytest.mark.siracusa_tiled
+@pytest.mark.parametrize("test_shape", DMA_TEST_SHAPES, ids = param_id_dma)
+@pytest.mark.parametrize("doublebuffer", [True, False], ids = param_id_dma)
+def test_l3_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen,
+                skipsim) -> None:
+    """Test L3Dma (Siracusa L3→L2 DMA transfers)."""
+    input_shape, tile_shape, node_count, data_type = test_shape
+
+    # Setup paths
+    test_name = f"testL3Dma_{param_id_dma(test_shape)}_{param_id_dma(doublebuffer)}"
+    platform = "Siracusa"
+    gen_dir, _, test_name_clean = get_test_paths(f"test_dma_gen/{test_name}", platform, base_dir = deeploy_test_dir)
+
+    # Generate network
+    if not skipgen:
+        # Clean gen_dir to avoid stale state
+        if os.path.exists(gen_dir):
+            shutil.rmtree(gen_dir)
+        deployer, test_inputs, test_outputs = setup_dma_deployer("L3Dma", input_shape, tile_shape, node_count,
+                                                                   data_type, doublebuffer, gen_dir)
+        generateTestNetwork(deployer, [test_inputs], [test_outputs], gen_dir, _NoVerbosity)
+
+    # Build and run
+    worker_id = get_worker_id()
+    if worker_id == "master":
+        build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / "build_master")
+    else:
+        build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}")
+
+    from testUtils.pytestRunner import DeeployTestConfig
+    config = DeeployTestConfig(
+        test_name = test_name_clean,
+        test_dir = gen_dir,
+        platform = platform,
+        simulator = 'gvsoc',
+        tiling = True,
+        gen_dir = gen_dir,
+        build_dir = build_dir,
+        toolchain = toolchain,
+        toolchain_install_dir = toolchain_dir,
+        cmake_args = list(cmake_args) + ["NUM_CORES=8"],
+    )
+
+    configure_cmake(config)
+    build_binary(config)
+
+    if not skipsim:
+        from testUtils.pytestRunner import run_simulation
+        result = run_simulation(config)
+        assert result.success, f"L3Dma test failed with {result.error_count} errors"
+        assert result.error_count == 0, f"Found {result.error_count} errors"
+
+
+@pytest.mark.dma
+@pytest.mark.snitch_dma
+@pytest.mark.snitch_tiled
+@pytest.mark.parametrize("test_shape", DMA_TEST_SHAPES, ids = param_id_dma)
+@pytest.mark.parametrize("doublebuffer", [True, False], ids = param_id_dma)
+def test_snitch_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen,
+                    skipsim) -> None:
+    """Test SnitchDma (Snitch L2→L1 DMA transfers)."""
+    input_shape, tile_shape, node_count, data_type = test_shape
+
+    # Setup paths
+    test_name = f"testSnitchDma_{param_id_dma(test_shape)}_{param_id_dma(doublebuffer)}"
+    platform = "Snitch"
+    gen_dir, _, test_name_clean = get_test_paths(f"test_dma_gen/{test_name}", platform, base_dir = deeploy_test_dir)
+
+    # Generate network
+    if not skipgen:
+        # Clean gen_dir to avoid stale state
+        if os.path.exists(gen_dir):
+            shutil.rmtree(gen_dir)
+        deployer, test_inputs, test_outputs = setup_dma_deployer("SnitchDma", input_shape, tile_shape, node_count,
+                                                                   data_type, doublebuffer, gen_dir)
+        generateTestNetwork(deployer, [test_inputs], [test_outputs], gen_dir, _NoVerbosity)
+
+    # Build and run
+    worker_id = get_worker_id()
+    if worker_id == "master":
+        build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / "build_master")
+    else:
+        build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / f"build_{worker_id}")
+
+    from testUtils.pytestRunner import DeeployTestConfig
+    config = DeeployTestConfig(
+        test_name = test_name_clean,
+        test_dir = gen_dir,
+        platform = platform,
+        simulator = 'gvsoc',
+        tiling = True,
+        gen_dir = gen_dir,
+        build_dir = build_dir,
+        toolchain = toolchain,
+        toolchain_install_dir = toolchain_dir,
+        cmake_args = list(cmake_args) + ["NUM_CORES=9"],
+    )
+
+    configure_cmake(config)
+    build_binary(config)
+
+    if not skipsim:
+        from testUtils.pytestRunner import run_simulation
+        result = run_simulation(config)
+        assert result.success, f"SnitchDma test failed with {result.error_count} errors"
+        assert result.error_count == 0, f"Found {result.error_count} errors"

From d73e40da55d6cf964f78b3038abc9327fe3241be Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Thu, 8 Jan 2026 14:30:03 +0100
Subject: [PATCH 36/51] Move DMA test to pytest and update CI

---
 .github/workflows/ci-deeploy-testing.yml | 78 ------------------------
 .github/workflows/ci-deeploy.yml         | 50 ++++++++++++++-
 DeeployTest/README.md                    |  4 +-
 DeeployTest/conftest.py                  |  4 --
 DeeployTest/test_dmas.py                 | 71 +++++++++++++++------
 5 files changed, 106 insertions(+), 101 deletions(-)
 delete mode 100644 .github/workflows/ci-deeploy-testing.yml

diff --git a/.github/workflows/ci-deeploy-testing.yml b/.github/workflows/ci-deeploy-testing.yml
deleted file mode 100644
index a44d557265..0000000000
--- a/.github/workflows/ci-deeploy-testing.yml
+++ /dev/null
@@ -1,78 +0,0 @@
-# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
-#
-# SPDX-License-Identifier: Apache-2.0
-
----
-name: CI • Deeploy Testing
-
-"on":
-  push:
-    branches:
-      - "**"
-    tags:
-      - "v*.*.*"
-  pull_request:
-  workflow_dispatch:
-    inputs:
-      docker_image_deeploy:
-        description: "Deeploy Image to use"
-        required: false
-        default: "ghcr.io/pulp-platform/deeploy:devel"
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  select-env:
-    uses: ./.github/workflows/_select-env.yml
-    with:
-      docker_image_deeploy: ${{ inputs.docker_image_deeploy }}
-
-  generate-network-type-inference:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - name: fail-input0
-            platform: Generic
-            test: testTypeInferenceDifferentTypes
-            type_map: "A=int8_t B=int8_t C=int8_t"
-            offset_map: "A=0 B=0 C=0"
-            shouldFail: true
-          - name: fail-input2
-            platform: Generic
-            test: testTypeInferenceDifferentTypes
-            type_map: "A=int16_t B=int8_t C=int16_t"
-            offset_map: "A=0 B=0 C=0"
-            shouldFail: true
-          - name: pass
-            platform: Generic
-            test: testTypeInferenceDifferentTypes
-            type_map: "A=int16_t B=int8_t C=int32_t"
-            offset_map: "A=0 B=0 C=0"
-            shouldFail: false
-    name: Test Type Inference (${{ matrix.name }})
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          python generateNetwork.py \
-            -p ${{ matrix.platform }} \
-            -t ./Tests/${{ matrix.test }} \
-            -v \
-            --input-type-map ${{ matrix.type_map }} \
-            --input-offset-map  ${{ matrix.offset_map }} \
-            ${{ matrix.shouldFail && '--shouldFail' || '' }}
diff --git a/.github/workflows/ci-deeploy.yml b/.github/workflows/ci-deeploy.yml
index 429e9c2027..5ba16593a2 100644
--- a/.github/workflows/ci-deeploy.yml
+++ b/.github/workflows/ci-deeploy.yml
@@ -235,4 +235,52 @@ jobs:
         shell: bash
         run: |
           cd DeeployTest
-          python testDmas.py
+          pytest test_dmas.py -v -n 4
+
+  generate-network-type-inference:
+    needs: select-env
+    runs-on: ${{ needs.select-env.outputs.runner }}
+    container:
+      image: ${{ needs.select-env.outputs.image }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - name: fail-input0
+            platform: Generic
+            test: testTypeInferenceDifferentTypes
+            type_map: "A=int8_t B=int8_t C=int8_t"
+            offset_map: "A=0 B=0 C=0"
+            shouldFail: true
+          - name: fail-input2
+            platform: Generic
+            test: testTypeInferenceDifferentTypes
+            type_map: "A=int16_t B=int8_t C=int16_t"
+            offset_map: "A=0 B=0 C=0"
+            shouldFail: true
+          - name: pass
+            platform: Generic
+            test: testTypeInferenceDifferentTypes
+            type_map: "A=int16_t B=int8_t C=int32_t"
+            offset_map: "A=0 B=0 C=0"
+            shouldFail: false
+    name: Test Type Inference (${{ matrix.name }})
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Build Deeploy
+        shell: bash
+        run: pip install -e .
+      - name: Run Test
+        shell: bash
+        run: |
+          cd DeeployTest
+          python generateNetwork.py \
+            -p ${{ matrix.platform }} \
+            -t ./Tests/${{ matrix.test }} \
+            -v \
+            --input-type-map ${{ matrix.type_map }} \
+            --input-offset-map  ${{ matrix.offset_map }} \
+            ${{ matrix.shouldFail && '--shouldFail' || '' }}
diff --git a/DeeployTest/README.md b/DeeployTest/README.md
index 0f94916d48..b5db1988b0 100644
--- a/DeeployTest/README.md
+++ b/DeeployTest/README.md
@@ -57,4 +57,6 @@ pytest test_platforms.py -m generic -v -n 16
 
 ### Misc
 
-When running `pytest -m <my-markers>` in a folder, PyTest will scan each file looking for tests. To speed up the detection you can specify the platform test file like `pytest test_platforms.py -m <my-markers>`.
+- When running `pytest -m <my-markers>` in a folder, PyTest will scan each file looking for tests. To speed up the detection you can specify the platform test file like `pytest test_platforms.py -m <my-markers>`.
+- If you place a breakpoint like `import IPython; IPython.embed()`, you need to run the test with `-s` to be able to enter breakpoints.
+- The `--pdb` flag is very useful as it drops a debugger session on failure.
\ No newline at end of file
diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index 15617d0f19..981fc6a7a9 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -68,10 +68,6 @@ def pytest_configure(config: pytest.Config) -> None:
     config.addinivalue_line("markers", "l3: mark test as L3 default memory level")
     config.addinivalue_line("markers", "wmem: mark test as using Neureka weight memory")
     config.addinivalue_line("markers", "dma: mark test as DMA test")
-    config.addinivalue_line("markers", "mchan_dma: mark test as MchanDma test (Siracusa L2→L1)")
-    config.addinivalue_line("markers", "l3_dma: mark test as L3Dma test (Siracusa L3→L2)")
-    config.addinivalue_line("markers", "snitch_dma: mark test as SnitchDma test (Snitch L2→L1)")
-    config.addinivalue_line("markers", "slow: mark test as slow running")
 
     # Configure logging based on verbosity
     verbosity = config.option.verbose
diff --git a/DeeployTest/test_dmas.py b/DeeployTest/test_dmas.py
index 396b4a1fc5..2cb598bbf6 100644
--- a/DeeployTest/test_dmas.py
+++ b/DeeployTest/test_dmas.py
@@ -28,10 +28,12 @@
 from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import (ArgumentStructGeneration,
                                                                                  MemoryManagementGeneration)
 from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, _NoVerbosity
+from Deeploy.Targets.PULPOpen.Bindings import L3MemoryAwareFunctionCallClosure
 from Deeploy.Targets.PULPOpen.Bindings import MemoryAwareFunctionCallClosure as PULPMemoryAwareFunctionCallClosure
 from Deeploy.Targets.PULPOpen.Bindings import TilingCallClosure as PULPTilingCallClosure
 from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPClusterTiling import PULPClusterTiling
-from Deeploy.Targets.PULPOpen.DMA.L3Dma import L3Dma
+from Deeploy.Targets.PULPOpen.CodeTransformationPasses.PULPL3Tiling import PULPL3Tiling
+from Deeploy.Targets.PULPOpen.DMA.L3Dma import L3Dma, l3DmaHack
 from Deeploy.Targets.PULPOpen.DMA.MchanDma import MchanDma
 from Deeploy.Targets.Snitch.Bindings import MemoryAwareFunctionCallClosure, TilingCallClosure
 from Deeploy.Targets.Snitch.CodeTransformationPasses import SnitchClusterTiling
@@ -43,6 +45,43 @@
     TilingVariableReplacement, TilingVariableReplacementUpdate)
 from Deeploy.TilingExtension.TilerExtension import TilingReadyNodeBindings
 
+
+@pytest.fixture(autouse=True)
+def clear_deeploy_state():
+    """Clear dynamically generated struct classes from AbstractDataTypes before each test.
+    
+    This prevents state pollution between DMA tests where dynamically generated
+    struct classes (like _memcpy_0_tiling_closure_args_t) persist and cause
+    conflicts when tests with different configurations try to create new versions.
+    """
+    import Deeploy.AbstractDataTypes as ADT
+    
+    # Get list of all attributes before test
+    attrs_to_remove = []
+    for attr_name in dir(ADT):
+        # Remove dynamically generated struct classes (closure args, etc.)
+        if attr_name.startswith('_') and ('closure_args' in attr_name or 'memcpy' in attr_name.lower()):
+            attr = getattr(ADT, attr_name, None)
+            if isinstance(attr, type):
+                attrs_to_remove.append(attr_name)
+    
+    # Remove stale struct classes
+    for attr_name in attrs_to_remove:
+        delattr(ADT, attr_name)
+    
+    yield  # Run the test
+    
+    # Clean up after test as well
+    for attr_name in dir(ADT):
+        if attr_name.startswith('_') and ('closure_args' in attr_name or 'memcpy' in attr_name.lower()):
+            attr = getattr(ADT, attr_name, None)
+            if isinstance(attr, type):
+                try:
+                    delattr(ADT, attr_name)
+                except AttributeError:
+                    pass
+
+
 # Test shape configurations: (input_shape, tile_shape, node_count, data_type)
 DMA_TEST_SHAPES = [
     ((10, 10), (10, 10), 1, "uint8_t"),
@@ -138,7 +177,20 @@ def setup_dma_deployer(dma_type: str, input_shape: tuple, tile_shape: tuple, nod
             MemoryManagementGeneration(defaultMemory),
             MemoryManagementGeneration(),
         ])
-    else:  # MchanDma, L3Dma
+    elif dma_type == "L3Dma":
+        # L3Dma uses PULPL3Tiling and L3MemoryAwareFunctionCallClosure
+        transformer = CodeTransformation([
+            TilingVariableReplacement(targetMemory),
+            PULPTilingCallClosure(writeback = False, generateStruct = True),
+            TilingVariableReplacementUpdate(targetMemory),
+            PULPL3Tiling("L3", "L2", l3DmaHack),
+            ArgumentStructGeneration(),
+            L3MemoryAwareFunctionCallClosure(writeback = False),
+            MemoryManagementGeneration("L2"),
+            MemoryManagementGeneration("L3.*"),
+            MemoryManagementGeneration(),
+        ])
+    else:  # MchanDma
         transformer = CodeTransformation([
             TilingVariableReplacement(targetMemory),
             PULPTilingCallClosure(writeback = False, generateStruct = True),
@@ -173,8 +225,6 @@ def setup_dma_deployer(dma_type: str, input_shape: tuple, tile_shape: tuple, nod
 
 
 @pytest.mark.dma
-@pytest.mark.mchan_dma
-@pytest.mark.siracusa_tiled
 @pytest.mark.parametrize("test_shape", DMA_TEST_SHAPES, ids = param_id_dma)
 @pytest.mark.parametrize("doublebuffer", [True, False], ids = param_id_dma)
 def test_mchan_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen,
@@ -189,9 +239,6 @@ def test_mchan_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolch
 
     # Generate network
     if not skipgen:
-        # Clean gen_dir to avoid stale state
-        if os.path.exists(gen_dir):
-            shutil.rmtree(gen_dir)
         deployer, test_inputs, test_outputs = setup_dma_deployer("MchanDma", input_shape, tile_shape, node_count,
                                                                    data_type, doublebuffer, gen_dir)
         generateTestNetwork(deployer, [test_inputs], [test_outputs], gen_dir, _NoVerbosity)
@@ -228,8 +275,6 @@ def test_mchan_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolch
 
 
 @pytest.mark.dma
-@pytest.mark.l3_dma
-@pytest.mark.siracusa_tiled
 @pytest.mark.parametrize("test_shape", DMA_TEST_SHAPES, ids = param_id_dma)
 @pytest.mark.parametrize("doublebuffer", [True, False], ids = param_id_dma)
 def test_l3_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen,
@@ -244,9 +289,6 @@ def test_l3_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain
 
     # Generate network
     if not skipgen:
-        # Clean gen_dir to avoid stale state
-        if os.path.exists(gen_dir):
-            shutil.rmtree(gen_dir)
         deployer, test_inputs, test_outputs = setup_dma_deployer("L3Dma", input_shape, tile_shape, node_count,
                                                                    data_type, doublebuffer, gen_dir)
         generateTestNetwork(deployer, [test_inputs], [test_outputs], gen_dir, _NoVerbosity)
@@ -283,8 +325,6 @@ def test_l3_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain
 
 
 @pytest.mark.dma
-@pytest.mark.snitch_dma
-@pytest.mark.snitch_tiled
 @pytest.mark.parametrize("test_shape", DMA_TEST_SHAPES, ids = param_id_dma)
 @pytest.mark.parametrize("doublebuffer", [True, False], ids = param_id_dma)
 def test_snitch_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen,
@@ -299,9 +339,6 @@ def test_snitch_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolc
 
     # Generate network
     if not skipgen:
-        # Clean gen_dir to avoid stale state
-        if os.path.exists(gen_dir):
-            shutil.rmtree(gen_dir)
         deployer, test_inputs, test_outputs = setup_dma_deployer("SnitchDma", input_shape, tile_shape, node_count,
                                                                    data_type, doublebuffer, gen_dir)
         generateTestNetwork(deployer, [test_inputs], [test_outputs], gen_dir, _NoVerbosity)

From 6f4b10758aba231d2930920e7746e2dbe3c149c5 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Thu, 8 Jan 2026 15:42:47 +0100
Subject: [PATCH 37/51] Format and lint

---
 DeeployTest/test_dmas.py | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/DeeployTest/test_dmas.py b/DeeployTest/test_dmas.py
index 2cb598bbf6..96c72fd919 100644
--- a/DeeployTest/test_dmas.py
+++ b/DeeployTest/test_dmas.py
@@ -12,21 +12,19 @@
 """
 
 import os
-import shutil
 from pathlib import Path
 
 import numpy as np
 import pytest
 from testUtils.codeGenerate import generateTestNetwork
-from testUtils.dmaUtils import (MemcpyLayer, MemcpyParser, MemcpyTileConstraint, MemcpyTypeChecker, generate_graph,
-                                 memcpyTemplate, prepare_deployer_with_custom_tiling, setup_pulp_deployer,
-                                 setup_snitch_deployer)
+from testUtils.dmaUtils import MemcpyLayer, MemcpyParser, MemcpyTileConstraint, MemcpyTypeChecker, generate_graph, \
+    memcpyTemplate, prepare_deployer_with_custom_tiling, setup_pulp_deployer, setup_snitch_deployer
 from testUtils.pytestRunner import build_binary, configure_cmake, get_test_paths, get_worker_id
 from testUtils.typeMapping import baseTypeFromName, dtypeFromDeeployType
 
 from Deeploy.AbstractDataTypes import PointerClass
-from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import (ArgumentStructGeneration,
-                                                                                 MemoryManagementGeneration)
+from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
+    MemoryManagementGeneration
 from Deeploy.DeeployTypes import CodeTransformation, NodeBinding, NodeMapper, _NoVerbosity
 from Deeploy.Targets.PULPOpen.Bindings import L3MemoryAwareFunctionCallClosure
 from Deeploy.Targets.PULPOpen.Bindings import MemoryAwareFunctionCallClosure as PULPMemoryAwareFunctionCallClosure
@@ -41,12 +39,12 @@
 from Deeploy.Targets.Snitch.CodeTransformationPasses.SnitchCoreFilter import SnitchCoreFilterPass
 from Deeploy.Targets.Snitch.CodeTransformationPasses.SnitchProfileExecutionBlock import SnitchProfileExecutionBlockPass
 from Deeploy.Targets.Snitch.DMA.SnitchDma import SnitchDma
-from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import (
-    TilingVariableReplacement, TilingVariableReplacementUpdate)
+from Deeploy.TilingExtension.CodeTransformationPasses.TilingVariableReplacement import TilingVariableReplacement, \
+    TilingVariableReplacementUpdate
 from Deeploy.TilingExtension.TilerExtension import TilingReadyNodeBindings
 
 
-@pytest.fixture(autouse=True)
+@pytest.fixture(autouse = True)
 def clear_deeploy_state():
     """Clear dynamically generated struct classes from AbstractDataTypes before each test.
     
@@ -55,7 +53,7 @@ def clear_deeploy_state():
     conflicts when tests with different configurations try to create new versions.
     """
     import Deeploy.AbstractDataTypes as ADT
-    
+
     # Get list of all attributes before test
     attrs_to_remove = []
     for attr_name in dir(ADT):
@@ -64,13 +62,13 @@ def clear_deeploy_state():
             attr = getattr(ADT, attr_name, None)
             if isinstance(attr, type):
                 attrs_to_remove.append(attr_name)
-    
+
     # Remove stale struct classes
     for attr_name in attrs_to_remove:
         delattr(ADT, attr_name)
-    
+
     yield  # Run the test
-    
+
     # Clean up after test as well
     for attr_name in dir(ADT):
         if attr_name.startswith('_') and ('closure_args' in attr_name or 'memcpy' in attr_name.lower()):
@@ -110,7 +108,7 @@ def param_id_dma(val):
 
 
 def setup_dma_deployer(dma_type: str, input_shape: tuple, tile_shape: tuple, node_count: int, data_type: str,
-                        doublebuffer: bool, gen_dir: str):
+                       doublebuffer: bool, gen_dir: str):
     """
     Set up deployer for DMA testing with custom tiling.
     
@@ -155,7 +153,7 @@ def setup_dma_deployer(dma_type: str, input_shape: tuple, tile_shape: tuple, nod
     graph = generate_graph(node_count, input_shape, dtype)
     inputTypes = {"input_0": PointerClass(_type)}
     _DEEPLOYSTATEDIR = os.path.join(gen_dir, "deeployStates")
-    
+
     if dma_type == "SnitchDma":
         deployer = setup_snitch_deployer(defaultMemory, targetMemory, graph, inputTypes, doublebuffer, _DEEPLOYSTATEDIR)
     else:
@@ -240,7 +238,7 @@ def test_mchan_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolch
     # Generate network
     if not skipgen:
         deployer, test_inputs, test_outputs = setup_dma_deployer("MchanDma", input_shape, tile_shape, node_count,
-                                                                   data_type, doublebuffer, gen_dir)
+                                                                 data_type, doublebuffer, gen_dir)
         generateTestNetwork(deployer, [test_inputs], [test_outputs], gen_dir, _NoVerbosity)
 
     # Build and run
@@ -290,7 +288,7 @@ def test_l3_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain
     # Generate network
     if not skipgen:
         deployer, test_inputs, test_outputs = setup_dma_deployer("L3Dma", input_shape, tile_shape, node_count,
-                                                                   data_type, doublebuffer, gen_dir)
+                                                                 data_type, doublebuffer, gen_dir)
         generateTestNetwork(deployer, [test_inputs], [test_outputs], gen_dir, _NoVerbosity)
 
     # Build and run
@@ -340,7 +338,7 @@ def test_snitch_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolc
     # Generate network
     if not skipgen:
         deployer, test_inputs, test_outputs = setup_dma_deployer("SnitchDma", input_shape, tile_shape, node_count,
-                                                                   data_type, doublebuffer, gen_dir)
+                                                                 data_type, doublebuffer, gen_dir)
         generateTestNetwork(deployer, [test_inputs], [test_outputs], gen_dir, _NoVerbosity)
 
     # Build and run

From 856c9dac29b2f34345c71de10a909a5fb9ec5b38 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 9 Jan 2026 14:53:03 +0100
Subject: [PATCH 38/51] Adapt the pytest suite and CI for the new test
 structure

---
 .github/workflows/ci-platform-chimera.yml     |   4 -
 .github/workflows/ci-platform-cortexm.yml     |  16 +-
 .github/workflows/ci-platform-generic.yml     | 102 +-------
 .github/workflows/ci-platform-mempool.yml     |  40 ---
 .../ci-platform-siracusa-neureka-tiled.yml    |  69 -----
 .../workflows/ci-platform-siracusa-tiled.yml  | 235 ------------------
 .github/workflows/ci-platform-siracusa.yml    |  92 -------
 .../workflows/ci-platform-snitch-tiled.yml    |  17 --
 .github/workflows/ci-platform-snitch.yml      |  17 --
 .github/workflows/ci-platform-softhier.yml    |   3 +-
 .github/workflows/infra-generate-ccache.yml   |  28 +--
 DeeployTest/test_chimera_config.py            |   2 +-
 DeeployTest/test_cortexm_config.py            |  26 +-
 DeeployTest/test_generic_config.py            | 137 +++++-----
 DeeployTest/test_mempool_config.py            |  49 ++--
 DeeployTest/test_siracusa_config.py           | 115 +++++----
 .../test_siracusa_neureka_tiled_config.py     |  44 ++--
 DeeployTest/test_siracusa_tiled_config.py     | 224 ++++++++++-------
 DeeployTest/test_snitch_config.py             |  20 +-
 DeeployTest/test_snitch_tiled_config.py       |  16 +-
 DeeployTest/test_softhier_config.py           |   2 +-
 21 files changed, 366 insertions(+), 892 deletions(-)

diff --git a/.github/workflows/ci-platform-chimera.yml b/.github/workflows/ci-platform-chimera.yml
index 73f1cf46e1..aad065ae78 100644
--- a/.github/workflows/ci-platform-chimera.yml
+++ b/.github/workflows/ci-platform-chimera.yml
@@ -36,7 +36,3 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels"
-      # test-names: |
-      #   Kernels/Integer/Add/Regular
-      # simulators: |
-      #   gvsoc
diff --git a/.github/workflows/ci-platform-cortexm.yml b/.github/workflows/ci-platform-cortexm.yml
index 84a064c260..92cc04af9f 100644
--- a/.github/workflows/ci-platform-cortexm.yml
+++ b/.github/workflows/ci-platform-cortexm.yml
@@ -36,18 +36,6 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels"
-      # test-names: |
-      #   Kernels/Integer/Add/Regular
-      #   Kernels/Integer/Add/MultIO
-      #   Kernels/Integer/Pad/Regular_1D
-      #   Kernels/Integer/Pad/Regular_2D
-      #   Kernels/Integer/MatMul/Regular
-      #   Kernels/Integer/MatMul/Add
-      #   Kernels/Integer/MaxPool
-      #   Kernels/Integer/Conv/Regular_2D_RQ
-      #   Kernels/Integer/ReduceSum
-      #   Kernels/Integer/ReduceMean
-      #   Kernels/Integer/Slice
 
   cortexm-models:
     needs: select-env
@@ -56,6 +44,4 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "models"
-      # test-names: |
-      #   Models/CNN_Linear2
-      #   Models/WaveFormer
+
diff --git a/.github/workflows/ci-platform-generic.yml b/.github/workflows/ci-platform-generic.yml
index fe7fd7ad34..34944fa39a 100644
--- a/.github/workflows/ci-platform-generic.yml
+++ b/.github/workflows/ci-platform-generic.yml
@@ -36,89 +36,6 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels"
-      # test-names: |
-      #   Kernels/FP32/ReLU
-      #   Kernels/FP32/Softmax/Regular
-
-      #   Kernels/FP32/Add/Regular
-
-      #   Kernels/FP32/Conv/DW_2D_Bias
-      #   Kernels/FP32/Conv/DW_2D_NoBias
-      #   Kernels/FP32/Conv/DW_2D_ZeroValuedBias
-
-      #   Kernels/FP32/Conv/Regular_2D_Bias
-      #   Kernels/FP32/Conv/Regular_2D_NoBias
-      #   Kernels/FP32/Conv/Regular_2D_ZeroValuedBias
-
-      #   Kernels/FP32/Div
-      #   Kernels/FP32/GEMM/Regular
-      #   Kernels/FP32/MatMul
-      #   Kernels/FP32/MaxPool
-      #   Kernels/FP32/Mul
-
-      #   Kernels/FP32/LayerNorm
-      #   Kernels/FP32/RMSNorm
-
-      #   Kernels/FP32/Pow/Scalar
-      #   Kernels/FP32/Pow/Vector
-
-      #   Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean
-      #   Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean_Add
-      #   Kernels/FP32/ReduceMean/KeepDims/AllAxes
-      #   Kernels/FP32/ReduceMean/KeepDims/Axes1_2_3
-      #   Kernels/FP32/ReduceMean/KeepDims/Axes1_3
-      #   Kernels/FP32/ReduceMean/KeepDims/Axes2_1
-      #   Kernels/FP32/ReduceMean/KeepDims/Axis0
-      #   Kernels/FP32/ReduceMean/KeepDims/Axis2
-      #   Kernels/FP32/ReduceMean/KeepDims/ReduceMean_Add
-
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean_Add
-      #   Kernels/FP32/ReduceMean/NoKeepDims/AllAxes
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axes1_2_3
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axes1_3
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axes2_1
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axis0
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axis2
-      #   Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add
-
-      #   Kernels/FP32/Reshape/SkipConnection
-      #   Kernels/FP32/Sqrt
-      #   Kernels/FP32/Transpose
-
-      #   Kernels/Integer/Softmax/Regular
-
-      #   Kernels/Integer/Add/MultIO
-      #   Kernels/Integer/Add/Regular
-
-      #   Kernels/Integer/Conv/DW_1D
-      #   Kernels/Integer/Conv/Regular_1D
-
-      #   Kernels/Integer/Conv/DW_2D
-      #   Kernels/Integer/Conv/Regular_2D
-
-      #   Kernels/Integer/GEMM/Regular
-
-      #   Kernels/Integer/MatMul/Add
-      #   Kernels/Integer/MatMul/Regular
-
-      #   Kernels/Integer/MaxPool
-
-      #   Kernels/Integer/Pad/Regular_1D
-      #   Kernels/Integer/Pad/Regular_2D
-
-      #   Kernels/Integer/ReduceMean
-      #   Kernels/Integer/ReduceSum
-      #   Kernels/Integer/Slice
-
-      #   Models/TinyViT/5M/Layers/FP32/ReduceMean
-
-      #   Kernels/Mixed/Dequant
-      #   Kernels/Mixed/Quant
-      #   Models/Transformer_DeepQuant
-      #   Kernels/Integer/Conv/DW_2D_RQ
-      #   Kernels/Integer/Conv/Regular_2D_RQ
-      #   Kernels/Integer/MatMul/Regular_RQ
 
   generic-models:
     needs: select-env
@@ -127,21 +44,4 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "models"
-      # test-names: |
-      #   Models/Autoencoder1D
-
-      #   Models/CCT/FP32/CCT_1_16_16_8
-      #   Models/CCT/FP32/CCT_2_32_32_128_Opset20
-      #   Models/CCT/Int/ICCT
-      #   Models/CCT/Int/ICCT_8
-      #   Models/CCT/Int/ICCT_ITA
-      #   Models/CCT/Int/ICCT_ITA_8
-
-      #   Models/miniMobileNet
-      #   Models/miniMobileNetv2
-
-      #   Models/CNN_Linear1
-      #   Models/TinyViT/Demo
-      #   Models/WaveFormer
-
-      #   Models/CNN_Linear2
+      
\ No newline at end of file
diff --git a/.github/workflows/ci-platform-mempool.yml b/.github/workflows/ci-platform-mempool.yml
index f75b1c33fc..efda508257 100644
--- a/.github/workflows/ci-platform-mempool.yml
+++ b/.github/workflows/ci-platform-mempool.yml
@@ -36,35 +36,6 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels"
-      # test-names: |
-      #   Kernels/Integer/Add/MultIO
-      #   Kernels/Integer/Add/Regular
-
-      #   Kernels/Integer/Conv/DW_1D
-      #   Kernels/Integer/Conv/Regular_1D
-
-      #   Kernels/Integer/Conv/DW_2D
-      #   Kernels/Integer/Conv/Regular_2D
-
-      #   Kernels/Integer/GEMM/Regular
-
-      #   Kernels/Integer/MatMul/Add
-      #   Kernels/Integer/MatMul/Regular
-
-      #   Kernels/Integer/MaxPool
-
-      #   Kernels/Integer/Pad/Regular_1D
-      #   Kernels/Integer/Pad/Regular_2D
-
-      #   Kernels/Integer/ReduceMean
-      #   Kernels/Integer/ReduceSum
-
-      #   Kernels/Integer/Slice
-
-      #   Kernels/Integer/Conv/Regular_2D_RQ
-      #   Kernels/Integer/Conv/DW_2D_RQ
-      #   Kernels/Integer/GEMM/Regular_RQPerRow
-      #   Kernels/Integer/MatMul/Regular_RQ
 
   mempool-models:
     needs: select-env
@@ -73,14 +44,3 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "models"
-      # test-names: |
-      #   Models/CCT/Int/ICCT
-      #   Models/CCT/Int/ICCT_8
-      #   Models/CCT/Int/ICCT_ITA
-
-      #   Models/miniMobileNet
-      #   Models/miniMobileNetv2
-
-      #   Models/CNN_Linear1
-
-      #   Models/CNN_Linear2
diff --git a/.github/workflows/ci-platform-siracusa-neureka-tiled.yml b/.github/workflows/ci-platform-siracusa-neureka-tiled.yml
index ec00066cfa..e76ee648c0 100644
--- a/.github/workflows/ci-platform-siracusa-neureka-tiled.yml
+++ b/.github/workflows/ci-platform-siracusa-neureka-tiled.yml
@@ -36,14 +36,6 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels and singlebuffer and l2 and not wmem"
-      # tests-config: |
-      #   [
-      #     {"name":"Kernels/Integer/GEMM/Regular_RQPerColumn","L1":[16000]},
-      #     {"name":"Kernels/Integer/Conv/PW_2D","L1":[32000]},
-      #     {"name":"Kernels/Integer/Conv/PW_2D_RQ/Regular_RQ","L1":[32000]},
-      #     {"name":"Kernels/Integer/Conv/PW_2D_RQ/Unsigned_RQ","L1":[32000]}
-      #   ]
-      # num-cores: 8
 
   siracusa-neureka-kernels-tiled-doublebuffer-L2:
     needs: select-env
@@ -55,29 +47,6 @@ jobs:
 
   siracusa-neureka-models-tiled-singlebuffer-L3:
     needs: select-env
-  #     tests-config: |
-  #       [
-  #         {"name":"Kernels/Integer/GEMM/Regular_RQPerColumn","L1":[16000]},
-  #         {"name":"Kernels/Integer/Conv/PW_2D","L1":[32000]},
-  #         {"name":"Kernels/Integer/Conv/PW_2D_RQ/Regular_RQ","L1":[32000]},
-  #         {"name":"Kernels/Integer/Conv/PW_2D_RQ/Unsigned_RQ","L1":[32000]}
-  #       ]
-  #     num-cores: 8
-  #     # double buffer enabled:
-  #     double-buffer: true
-
-  # siracusa-neureka-models-tiled-singlebuffer-L3:
-  #   needs: select-env
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       test-data:
-  #         - { name: "Models/miniMobileNet", L1: [2000] } # LMACAN: 1000 leads to non-2d transfers in L3!
-  #         - { name: "Kernels/Integer/Attention", L1: [2500] }
-  #         - { name: "Models/Transformer", L1: [15000] }
-  #         - { name: "Models/microLlama/microLlama1", L1: [10000] }
-  #       num-cores: [8]
-  #       default-memory-level: ["L3"]
     uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
@@ -86,16 +55,6 @@ jobs:
 
   siracusa-neureka-models-tiled-doublebuffer-L3:
     needs: select-env
-    # strategy:
-    #   fail-fast: false
-    #   matrix:
-    #     test-data:
-    #       - { name: "Models/miniMobileNet", L1: [2000] } # LMACAN note
-    #       - { name: "Kernels/Integer/Attention", L1: [5000] }
-    #       - { name: "Models/Transformer", L1: [30000] }
-    #     num-cores: [8]
-    #     double-buffer: [true]
-    #     default-memory-level: ["L3"]
     uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
@@ -104,34 +63,6 @@ jobs:
 
   siracusa-neureka-kernels-tiled-singlebuffer-L2-wmem:
     needs: select-env
-  #   uses: ./.github/workflows/_runner-siracusa-neureka-tiled-sequential.yml
-  #   with:
-  #     runner: ${{ needs.select-env.outputs.runner }}
-  #     docker-image: ${{ needs.select-env.outputs.image }}
-  #     tests-config: |
-  #       [
-  #         {"name":"Kernels/Integer/GEMM/Regular_RQPerColumn","L1":[16000]},
-  #         {"name":"Kernels/Integer/Conv/PW_2D","L1":[32000]},
-  #         {"name":"Kernels/Integer/Conv/PW_2D_RQ/Regular_RQ","L1":[32000]},
-  #         {"name":"Kernels/Integer/Conv/PW_2D_RQ/Unsigned_RQ","L1":[32000]}
-  #       ]
-  #     num-cores: 8
-  #     neureka-wmem: true
-
-  # siracusa-neureka-models-tiled-doublebuffer-L3-wmem:
-  #   needs: select-env
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       test-data:
-  #         - { name: "Models/miniMobileNet", L1: [2000] } # LMACAN note
-  #         - { name: "Kernels/Integer/Attention", L1: [3500] }
-  #         # - { name: "Models/Transformer", L1: [30000] }
-  #         - { name: "Models/microLlama/microLlama1", L1: [10000] }
-  #       num-cores: [8]
-  #       double-buffer: [true]
-  #       default-memory-level: ["L3"]
-  #       neureka-wmem: [true]
     uses: ./.github/workflows/_runner-siracusa-neureka-tiled.yml
     with:
       runner: ${{ needs.select-env.outputs.runner }}
diff --git a/.github/workflows/ci-platform-siracusa-tiled.yml b/.github/workflows/ci-platform-siracusa-tiled.yml
index 7ed36be4e8..6597f3e625 100644
--- a/.github/workflows/ci-platform-siracusa-tiled.yml
+++ b/.github/workflows/ci-platform-siracusa-tiled.yml
@@ -37,171 +37,6 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels and l2 and singlebuffer"
-  #   uses: ./.github/workflows/_runner-siracusa-tiled-sequential.yml
-  #   with:
-  #     runner: ${{ needs.select-env.outputs.runner }}
-  #     docker-image: ${{ needs.select-env.outputs.image }}
-  #     tests-config: |
-  #       [
-  #         {"name":"Kernels/FP32/ReLU","L1":[2000]},
-  #         {"name":"Kernels/FP32/Softmax/Regular","L1":[4000]},
-
-  #         {"name":"Kernels/FP32/Add/Large","L1":[220000]},
-
-  #         {"name":"Kernels/FP32/Conv/DW_2D_Bias","L1":[7200]},
-  #         {"name":"Kernels/FP32/Conv/DW_2D_NoBias","L1":[7200]},
-  #         {"name":"Kernels/FP32/Conv/DW_2D_ZeroValuedBias","L1":[7200]},
-  #         {"name":"Kernels/FP32/Conv/Regular_2D_Bias","L1":[6600]},
-  #         {"name":"Kernels/FP32/Conv/Regular_2D_NoBias","L1":[1600]},
-  #         {"name":"Kernels/FP32/Conv/Regular_2D_ZeroValuedBias","L1":[6600]},
-
-  #         {"name":"Kernels/FP32/GEMM/Regular","L1":[8000]},
-  #         {"name":"Kernels/FP32/MatMul","L1":[2000]},
-  #         {"name":"Kernels/FP32/MaxPool","L1":[2000]},
-  #         {"name":"Kernels/FP32/Mul","L1":[2000]},
-  #         {"name":"Kernels/FP32/LayerNorm","L1":[2000]},
-
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean","L1":[8000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean_Add","L1":[8000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/AllAxes","L1":[50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axes1_2_3","L1":[50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axes1_3","L1":[5000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axes2_1","L1":[6200,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axis0","L1":[8400,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axis2","L1":[8400,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/ReduceMean_Add","L1":[8000]},
-
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean","L1":[8000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean_Add","L1":[8000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/AllAxes","L1":[50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axes1_2_3","L1":[50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axes1_3","L1":[5000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axes2_1","L1":[6200,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axis0","L1":[8400,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axis2","L1":[8400,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add","L1":[8000]},
-
-  #         {"name":"Kernels/FP32/Reshape/SkipConnection","L1":[1400]},
-  #         {"name":"Kernels/FP32/Transpose","L1":[2000]},
-
-  #         {"name":"Kernels/Integer/Hardswish/Regular","L1":[750]},
-  #         {"name":"Kernels/Integer/Softmax/Regular","L1":[800,500,300]},
-
-  #         {"name":"Kernels/Integer/Concat","L1":[32000,16000,8000]},
-
-  #         {"name":"Kernels/Integer/MatMul/Batch","L1":[20000]},
-  #         {"name":"Kernels/Integer/MatMul/Regular","L1":[64000,32000,16000]},
-
-  #         {"name":"Kernels/Integer/RMSNorm","L1":[2048,1024,512]},
-
-  #         {"name":"Kernels/Integer/Conv/Regular_2D_RQ","L1":[8000,6000,4000]},
-  #         {"name":"Kernels/Integer/Conv/DW_2D_RQ","L1":[2561]},
-  #         {"name":"Kernels/Integer/Conv/StriddedPadded_2D_RQ","L1":[600]},
-  #         {"name":"Kernels/Integer/GEMM/Batch_RQ","L1":[20000]},
-  #         {"name":"Kernels/Integer/Hardswish/Regular_RQ","L1":[750]}
-  #       ]
-  #     num-cores: 8
-
-  # siracusa-kernels-tiled-doublebuffer-L2:
-  #   needs: select-env
-  #   uses: ./.github/workflows/_runner-siracusa-tiled-sequential.yml
-  #   with:
-  #     runner: ${{ needs.select-env.outputs.runner }}
-  #     docker-image: ${{ needs.select-env.outputs.image }}
-  #     tests-config: |
-  #       [
-  #         {"name":"Kernels/FP32/ReLU","L1":[20]},
-  #         {"name":"Kernels/FP32/Softmax/Regular","L1":[8000]},
-
-  #         {"name":"Kernels/FP32/Conv/DW_2D_Bias","L1":[10000]},
-  #         {"name":"Kernels/FP32/Conv/DW_2D_NoBias","L1":[9800]},
-  #         {"name":"Kernels/FP32/Conv/DW_2D_ZeroValuedBias","L1":[9800]},
-  #         {"name":"Kernels/FP32/Conv/Regular_2D_Bias","L1":[8800]},
-  #         {"name":"Kernels/FP32/Conv/Regular_2D_NoBias","L1":[2000]},
-  #         {"name":"Kernels/FP32/Conv/Regular_2D_ZeroValuedBias","L1":[8800]},
-
-  #         {"name":"Kernels/FP32/GEMM/Regular","L1":[8000]},
-  #         {"name":"Kernels/FP32/MatMul","L1":[5000]},
-  #         {"name":"Kernels/FP32/MaxPool","L1":[5000]},
-  #         {"name":"Kernels/FP32/Mul","L1":[2000]},
-  #         {"name":"Kernels/FP32/LayerNorm","L1":[2000]},
-
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean","L1":[8000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean_Add","L1":[8000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/AllAxes","L1":[100000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axes1_2_3","L1":[100000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axes1_3","L1":[10000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axes2_1","L1":[13000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axis0","L1":[17000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/Axis2","L1":[17000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/KeepDims/ReduceMean_Add","L1":[8000]},
-
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean","L1":[8000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean_Add","L1":[8000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/AllAxes","L1":[100000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axes1_2_3","L1":[100000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axes1_3","L1":[10000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axes2_1","L1":[13000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axis0","L1":[17000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/Axis2","L1":[17000,50000]},
-  #         {"name":"Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add","L1":[8000]},
-
-  #         {"name":"Kernels/FP32/Reshape/SkipConnection","L1":[2600]},
-  #         {"name":"Kernels/FP32/Transpose","L1":[2000]},
-
-  #         {"name":"Kernels/Integer/Hardswish/Regular","L1":[750]},
-  #         {"name":"Kernels/Integer/Softmax/Regular","L1":[1600,1000,600]},
-
-  #         {"name":"Kernels/Integer/Concat","L1":[64000,32000,16000]},
-  #         {"name":"Kernels/Integer/MatMul/Regular","L1":[64000,32000,16000]},
-  #         {"name":"Kernels/Integer/RMSNorm","L1":[4096,2048,1024]},
-
-  #         {"name":"Kernels/Integer/Conv/Regular_2D_RQ","L1":[8000,6000,5000]},
-  #         {"name":"Kernels/Integer/Conv/DW_2D_RQ","L1":[5121]},
-  #         {"name":"Kernels/Integer/Hardswish/Regular_RQ","L1":[800]}
-  #       ]
-  #     num-cores: 8
-  #     double-buffer: true
-
-  # siracusa-models-tiled-singlebuffer-L2:
-  #   needs: select-env
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       test-data:
-  #         - name: "Kernels/Integer/Attention"
-  #           L1: [60000, 10000, 5000]
-
-  #         - name: "Models/CCT/FP32/CCT_1_16_16_8"
-  #           L1: [64000]
-
-  #         - name: "Models/microLlama/microLlama1"
-  #           L1: [60000, 10000, 5000]
-  #         - name: "Models/microLlama/microLlama8"
-  #           L1: [60000, 10000, 5000]
-  #         - name: "Models/microLlama/microLlama8_parallel"
-  #           L1: [60000, 10000, 5000]
-
-  #         - name: "Models/miniMobileNet"
-  #           L1: [60000, 12000, 6000, 3000]
-  #         - name: "Models/miniMobileNetv2"
-  #           L1: [60000, 16000, 12000, 8000]
-
-  #         - name: "Models/MLPerf/AnomalyDetection"
-  #           L1: [64000]
-  #         - name: "Models/MLPerf/ImageClassification"
-  #           L1: [64000]
-  #         - name: "Models/MLPerf/KeywordSpotting"
-  #           L1: [64000]
-
-  #         - name: "Models/TinyViT/5M/Layers/FP32/ReduceMean"
-  #           L1: [200, 40000]
-  #         - name: "Models/TinyViT/Demo"
-  #           L1: [4000]
-
-  #         - name: "Models/CNN_Linear2"
-  #           L1: [45000, 30000, 15000]
-  #       num-cores: [8]
 
   # Kernel tests - L2 doublebuffer
   siracusa-kernels-tiled-l2-doublebuffer:
@@ -211,36 +46,6 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels and l2 and doublebuffer"
-    # strategy:
-    #   fail-fast: false
-    #   matrix:
-    #     test-data:
-    #       - name: "Kernels/Integer/Attention"
-    #         L1: [60000, 10000, 5000, 2500]
-
-    #       - name: "Models/CCT/FP32/CCT_2_32_32_128"
-    #         L1: [128000]
-
-    #       - name: "Models/microLlama/microLlama1"
-    #         L1: [60000, 10000, 5000]
-
-    #       - name: "Models/miniMobileNet"
-    #         L1: [60000, 12000, 6000] # SCHEREMO note
-    #       - name: "Models/miniMobileNetv2"
-    #         L1: [60000, 16000, 12000, 8000]
-    #       - name: "Models/TinyViT/5M/Layers/FP32/ReduceMean"
-    #         L1: [200, 40000]
-    #       - name: "Models/TinyViT/Demo"
-    #         L1: [4000]
-
-    #       - name: "Models/CNN_Linear2"
-    #         L1: [45000, 30000, 16000] # SCHEREMO note
-    #       - name: "Models/CCT_Train/CCT2_FT2"
-    #         L1: [128000]
-    #       - name: "Models/Transformer"
-    #         L1: [60000, 30000, 15000]
-    #     num-cores: [8]
-    #     default-memory-level: ["L3"]
 
   # Model tests - L2 singlebuffer
   siracusa-models-tiled-l2-singlebuffer:
@@ -277,43 +82,3 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "models and l3 and doublebuffer"
-
-
-  # siracusa-models-tiled-doublebuffer-L3:
-  #   needs: select-env
-  #   strategy:
-  #     fail-fast: false
-  #     matrix:
-  #       test-data:
-  #         - name: "Kernels/Integer/Attention"
-  #           L1: [60000, 20000, 10000, 5000]
-
-  #         - name: "Models/CCT/FP32/CCT_2_32_32_128"
-  #           L1: [128000]
-
-  #         - name: "Models/microLlama/microLlama1"
-  #           L1: [60000, 20000, 10000]
-  #         - name: "Models/microLlama/microLlama8"
-  #           L1: [60000, 20000, 10000]
-  #         - name: "Models/microLlama/microLlama8_parallel"
-  #           L1: [60000, 20000, 10000]
-
-  #         - name: "Models/miniMobileNet"
-  #           L1: [60000, 24000, 12000, 6000]
-  #         - name: "Models/miniMobileNetv2"
-  #           L1: [60000, 32000, 24000, 16000]
-
-  #         - name: "Models/TinyViT/5M/Layers/FP32/ReduceMean"
-  #           L1: [200, 40000]
-  #         - name: "Models/TinyViT/Demo"
-  #           L1: [4000]
-
-  #         - name: "Models/CNN_Linear2"
-  #           L1: [60000, 45000, 30000]
-  #         - name: "Models/CCT_Train/CCT2_FT2"
-  #           L1: [128000]
-  #         - name: "Models/Transformer"
-  #           L1: [60000, 30000, 15000]
-  #       num-cores: [8]
-  #       double-buffer: [true]
-  #       default-memory-level: ["L3"]
\ No newline at end of file
diff --git a/.github/workflows/ci-platform-siracusa.yml b/.github/workflows/ci-platform-siracusa.yml
index ef518fbeb7..9cec1ef896 100644
--- a/.github/workflows/ci-platform-siracusa.yml
+++ b/.github/workflows/ci-platform-siracusa.yml
@@ -36,81 +36,6 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       test-type: kernels
-      # test-names: |
-      #   Kernels/FP32/ReLU
-
-      #   Kernels/FP32/Softmax/CrossEntropy
-      #   Kernels/FP32/Softmax/CrossEntropyGrad
-      #   Kernels/FP32/Softmax/Grad
-      #   Kernels/FP32/Softmax/Regular
-
-      #   Kernels/FP32/Add/Regular
-
-      #   Kernels/FP32/Conv/DW_2D_Bias
-      #   Kernels/FP32/Conv/DW_2D_NoBias
-      #   Kernels/FP32/Conv/DW_2D_ZeroValuedBias
-      #   Kernels/FP32/Conv/Regular_2D_Bias
-      #   Kernels/FP32/Conv/Regular_2D_NoBias
-      #   Kernels/FP32/Conv/Regular_2D_ZeroValuedBias
-
-      #   Kernels/FP32/GEMM/Regular
-      #   Kernels/FP32/MatMul
-      #   Kernels/FP32/MaxPool
-      #   Kernels/FP32/Mul
-      #   Kernels/FP32/LayerNorm
-
-      #   Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean
-      #   Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean_Add
-      #   Kernels/FP32/ReduceMean/KeepDims/AllAxes
-      #   Kernels/FP32/ReduceMean/KeepDims/Axes1_2_3
-      #   Kernels/FP32/ReduceMean/KeepDims/Axes1_3
-      #   Kernels/FP32/ReduceMean/KeepDims/Axes2_1
-      #   Kernels/FP32/ReduceMean/KeepDims/Axis0
-      #   Kernels/FP32/ReduceMean/KeepDims/Axis2
-      #   Kernels/FP32/ReduceMean/KeepDims/ReduceMean_Add
-
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean_Add
-      #   Kernels/FP32/ReduceMean/NoKeepDims/AllAxes
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axes1_2_3
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axes1_3
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axes2_1
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axis0
-      #   Kernels/FP32/ReduceMean/NoKeepDims/Axis2
-      #   Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add
-
-      #   Kernels/FP32/ReduceSum
-      #   Kernels/FP32/Reshape/SkipConnection
-
-      #   Kernels/FP32/Transpose
-
-      #   Kernels/Integer/Hardswish/Regular
-      #   Kernels/Integer/Softmax/Regular
-
-      #   Kernels/Integer/Add/MultIO
-      #   Kernels/Integer/Add/Regular
-
-      #   Kernels/Integer/Concat
-
-      #   Kernels/Integer/MatMul/Add
-      #   Kernels/Integer/MatMul/Regular
-
-      #   Kernels/Integer/Pad/Regular_1D
-      #   Kernels/Integer/Pad/Regular_2D
-
-      #   Kernels/Integer/RMSNorm
-
-      #   Models/TinyViT/5M/Layers/FP32/ReduceMean
-
-      #   Others/Backtracking
-      #   Kernels/Mixed/Dequant
-      #   Kernels/Mixed/Quant
-      #   Models/Transformer_DeepQuant
-      #   Kernels/Integer/Conv/Regular_2D_RQ
-      #   Kernels/Integer/Conv/DW_2D_RQ
-      #   Kernels/Integer/Hardswish/Regular_RQ
-      #   Kernels/Integer/TrueIntegerDiv
-      # num-cores: 8
 
   siracusa-models:
     needs: select-env
@@ -119,20 +44,3 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       test-type: models
-      # test-names: |
-      #   Kernels/Integer/Attention
-
-      #   Models/CCT/FP32/CCT_1_16_16_8
-      #   Models/CCT/FP32/CCT_2_32_32_128_Opset20
-
-      #   Models/miniMobileNet
-      #   Models/miniMobileNetv2
-
-      #   Models/MLPerf/KeywordSpotting
-      #   Models/MLPerf/ImageClassification
-      #   Models/MLPerf/AnomalyDetection
-
-      #   Models/TinyViT/Demo
-
-      #   Models/CNN_Linear2
-      # num-cores: 8
diff --git a/.github/workflows/ci-platform-snitch-tiled.yml b/.github/workflows/ci-platform-snitch-tiled.yml
index c1117b3d77..5390d8ad16 100644
--- a/.github/workflows/ci-platform-snitch-tiled.yml
+++ b/.github/workflows/ci-platform-snitch-tiled.yml
@@ -36,20 +36,3 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels and singlebuffer and l2"
-
-      # tests-config: |
-      #   [
-      #     {"name":"Kernels/Integer/Add/Large","L1":[5000,10000]},
-      #     {"name":"Kernels/Integer/Softmax/Large","L1":[5000,10000]},
-
-      #     {"name":"Kernels/FP32/Softmax/Regular","L1":[2000,5000,10000]},
-
-      #     {"name":"Kernels/FP32/GEMM/Regular","L1":[2000,5000,10000]},
-      #     {"name":"Kernels/FP32/GEMM/TransB","L1":[2000,5000,10000]},
-
-      #     {"name":"Kernels/Integer/iNoNorm","L1":[5000,10000]},
-      #     {"name":"Kernels/Integer/Add/Regular_RQ","L1":[5000,10000]},
-      #     {"name":"Kernels/Integer/GEMM/Regular_RQPerRow","L1":[2000,5000]}
-      #   ]
-      # simulators: |
-      #   gvsoc
diff --git a/.github/workflows/ci-platform-snitch.yml b/.github/workflows/ci-platform-snitch.yml
index ee2bca378c..5b1a1d5727 100644
--- a/.github/workflows/ci-platform-snitch.yml
+++ b/.github/workflows/ci-platform-snitch.yml
@@ -36,21 +36,4 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels"
-      # test-names: |
-      #   Kernels/FP32/Softmax/Regular
 
-      #   Kernels/Integer/Add/Large
-      #   Kernels/Integer/Add/Regular
-
-      #   Kernels/Integer/Softmax/Large
-      #   Kernels/Integer/Softmax/Regular
-
-      #   Kernels/Integer/MatMul/Regular
-
-      #   Kernels/Integer/iNoNorm
-      #   Kernels/Integer/GEMM/Regular_RQPerRow
-      #   Kernels/Integer/Add/Regular_RQ
-      #   Kernels/Integer/GEMM/TransB_RQ
-      # num-cores: 9
-      # simulators: |
-      #   gvsoc
diff --git a/.github/workflows/ci-platform-softhier.yml b/.github/workflows/ci-platform-softhier.yml
index 3bccb72d78..e1c534ec4c 100644
--- a/.github/workflows/ci-platform-softhier.yml
+++ b/.github/workflows/ci-platform-softhier.yml
@@ -36,5 +36,4 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels"
-      # test-names: |
-      #   Kernels/Integer/Add/Regular
+
diff --git a/.github/workflows/infra-generate-ccache.yml b/.github/workflows/infra-generate-ccache.yml
index 0c0abfce97..ae2d370bc0 100644
--- a/.github/workflows/infra-generate-ccache.yml
+++ b/.github/workflows/infra-generate-ccache.yml
@@ -30,30 +30,20 @@ jobs:
         shell: bash
         run: pip install -e .
 
-      # python testRunner_generic.py -t ./Tests/Kernels/Integer/Add/Regular
-      # python testRunner_mempool.py -t ./Tests/Kernels/Integer/Add/Regular
-      # python testRunner_cortexm.py -t ./Tests/Kernels/Integer/Add/Regular
-      # python testRunner_snitch.py -t ./Tests/Kernels/Integer/Add/Regular
-      # python testRunner_tiled_snitch.py -t ./Tests/Kernels/Integer/Add/Regular
-      # python testRunner_siracusa.py -t ./Tests/Kernels/Integer/Add/Regular
-      # python testRunner_tiled_siracusa.py -t ./Tests/Kernels/Integer/Add/Regular
-      # python testRunner_tiled_siracusa_w_neureka.py -t ./Tests/Kernels/Integer/Add/Regular
-      # python testRunner_chimera.py -t ./Tests/Kernels/Integer/Add/Regular
-
       - name: Generate CCache
         run: |
           cd DeeployTest
           mkdir -p /app/.ccache
           export CCACHE_DIR=/app/.ccache
-          pytest 'test_platforms.py::test_generic_kernels[Adder]' --skipsim
-          pytest 'test_platforms.py::test_mempool_kernels[Adder]' --skipsim
-          pytest 'test_platforms.py::test_cortexm_kernels[Adder]' --skipsim
-          pytest 'test_platforms.py::test_snitch_kernels[Adder]' --skipsim
-          pytest 'test_platforms.py::test_snitch_tiled_kernels[Adder-128000-L2]' --skipsim
-          pytest 'test_platforms.py::test_siracusa_kernels[Adder]' --skipsim
-          pytest 'test_platforms.py::test_siracusa_tiled_kernels_singlebuffer_l2[Adder-64000-L2-singlebuffer]' --skipsim
-          pytest 'test_platforms.py::test_siracusa_neureka_tiled_kernels_l2_singlebuffer[testRequantizedLinear-16000-L2-singlebuffer]' --skipsim
-          pytest 'test_platforms.py::test_chimera_kernels[Adder]' --skipsim
+          pytest 'test_platforms.py::test_generic_kernels[Kernels/Integer/Add/Regular]' --skipsim
+          pytest 'test_platforms.py::test_mempool_kernels[Kernels/Integer/Add/Regular]' --skipsim
+          pytest 'test_platforms.py::test_cortexm_kernels[Kernels/Integer/Add/Regular]' --skipsim
+          pytest 'test_platforms.py::test_snitch_kernels[Kernels/Integer/Add/Regular]' --skipsim
+          pytest 'test_platforms.py::test_snitch_tiled_kernels_l2_singlebuffer[Kernels/Integer/Add/Large-5000-L2-singlebuffer]' --skipsim
+          pytest 'test_platforms.py::test_siracusa_kernels[Kernels/Integer/Add/Regular]' --skipsim
+          pytest 'test_platforms.py::test_siracusa_tiled_kernels_l2_singlebuffer[Kernels/Integer/MatMul/Regular-64000-L2-singlebuffer]' --skipsim
+          pytest 'test_platforms.py::test_siracusa_neureka_tiled_kernels_l2_singlebuffer[Kernels/Integer/GEMM/Regular_RQPerColumn-16000-L2-singlebuffer]' --skipsim
+          pytest 'test_platforms.py::test_chimera_kernels[Kernels/Integer/Add/Regular]' --skipsim
 
       - name: Clean and Upload CCache
         uses: actions/cache@v4
diff --git a/DeeployTest/test_chimera_config.py b/DeeployTest/test_chimera_config.py
index 1896367e89..bcc846cb75 100644
--- a/DeeployTest/test_chimera_config.py
+++ b/DeeployTest/test_chimera_config.py
@@ -7,7 +7,7 @@
 # Currently only Adder test is in CI
 
 KERNEL_TESTS = [
-    "Adder",
+    "Kernels/Integer/Add/Regular",
 ]
 
 MODEL_TESTS = []
diff --git a/DeeployTest/test_cortexm_config.py b/DeeployTest/test_cortexm_config.py
index 1de427bcf8..dbbd2e4758 100644
--- a/DeeployTest/test_cortexm_config.py
+++ b/DeeployTest/test_cortexm_config.py
@@ -4,20 +4,20 @@
 """Test configuration for Cortex-M (QEMU-ARM) platform."""
 
 KERNEL_TESTS = [
-    "Adder",
-    "MultIO",
-    "test1DPad",
-    "test2DPad",
-    "testMatMul",
-    "testMatMulAdd",
-    "testMaxPool",
-    "testRQConv",
-    "testReduceSum",
-    "testReduceMean",
-    "testSlice",
+    "Kernels/Integer/Add/Regular",
+    "Kernels/Integer/Add/MultIO",
+    "Kernels/Integer/Pad/Regular_1D",
+    "Kernels/Integer/Pad/Regular_2D",
+    "Kernels/Integer/MatMul/Regular",
+    "Kernels/Integer/MatMul/Add",
+    "Kernels/Integer/MaxPool",
+    "Kernels/Integer/Conv/Regular_2D_RQ",
+    "Kernels/Integer/ReduceSum",
+    "Kernels/Integer/ReduceMean",
+    "Kernels/Integer/Slice",
 ]
 
 MODEL_TESTS = [
-    "simpleRegression",
-    "WaveFormer",
+    "Models/CNN_Linear2",
+    "Models/WaveFormer",
 ]
diff --git a/DeeployTest/test_generic_config.py b/DeeployTest/test_generic_config.py
index c99e961c6e..050b8ae0ba 100644
--- a/DeeployTest/test_generic_config.py
+++ b/DeeployTest/test_generic_config.py
@@ -4,64 +4,87 @@
 """Test configuration for Generic platform."""
 
 KERNEL_TESTS = [
-    "Adder",
-    "MultIO",
-    "test1DConvolution",
-    "test2DConvolution",
-    "test1DDWConvolution",
-    "test2DDWConvolution",
-    "test1DPad",
-    "test2DPad",
-    "testGEMM",
-    "testMatMul",
-    "testMatMulAdd",
-    "testMaxPool",
-    "testRQConv",
-    "testRQMatMul",
-    "testReduceSum",
-    "testReduceMean",
-    "testSlice",
-    "testRequantizedDWConv",
-    "test2DRequantizedConv",
-    "iSoftmax",
-    "testFloatAdder",
-    "testFloatGEMM",
-    "testFloat2DConvolution",
-    "testFloat2DConvolutionBias",
-    "testFloat2DConvolutionZeroBias",
-    "testFloatLayerNorm",
-    "testFloatDiv",
-    "testFloat2DDWConvolution",
-    "testFloat2DDWConvolutionBias",
-    "testFloat2DDWConvolutionZeroBias",
-    "testFloatRelu",
-    "testFloatMaxPool",
-    "testFloatMatmul",
-    "testFloatReshapeWithSkipConnection",
-    "testFloatSoftmax",
-    "testFloatTranspose",
-    "testFloatMul",
-    "testFloatPowScalar",
-    "testFloatPowVector",
-    "testFloatSqrt",
-    "testFloatRMSNorm",
-    "Quant",
-    "Dequant",
-    "QuantizedLinear",
+    # FP32 Kernels
+    "Kernels/FP32/ReLU",
+    "Kernels/FP32/Softmax/Regular",
+    "Kernels/FP32/Add/Regular",
+    "Kernels/FP32/Conv/DW_2D_Bias",
+    "Kernels/FP32/Conv/DW_2D_NoBias",
+    "Kernels/FP32/Conv/DW_2D_ZeroValuedBias",
+    "Kernels/FP32/Conv/Regular_2D_Bias",
+    "Kernels/FP32/Conv/Regular_2D_NoBias",
+    "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias",
+    "Kernels/FP32/Div",
+    "Kernels/FP32/GEMM/Regular",
+    "Kernels/FP32/MatMul",
+    "Kernels/FP32/MaxPool",
+    "Kernels/FP32/Mul",
+    "Kernels/FP32/LayerNorm",
+    "Kernels/FP32/RMSNorm",
+    "Kernels/FP32/Pow/Scalar",
+    "Kernels/FP32/Pow/Vector",
+    "Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean",
+    "Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean_Add",
+    "Kernels/FP32/ReduceMean/KeepDims/AllAxes",
+    "Kernels/FP32/ReduceMean/KeepDims/Axes1_2_3",
+    "Kernels/FP32/ReduceMean/KeepDims/Axes1_3",
+    "Kernels/FP32/ReduceMean/KeepDims/Axes2_1",
+    "Kernels/FP32/ReduceMean/KeepDims/Axis0",
+    "Kernels/FP32/ReduceMean/KeepDims/Axis2",
+    "Kernels/FP32/ReduceMean/KeepDims/ReduceMean_Add",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean_Add",
+    "Kernels/FP32/ReduceMean/NoKeepDims/AllAxes",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes1_2_3",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes1_3",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes2_1",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axis0",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axis2",
+    "Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add",
+    "Kernels/FP32/Reshape/SkipConnection",
+    "Kernels/FP32/Sqrt",
+    "Kernels/FP32/Transpose",
+    # Integer Kernels
+    "Kernels/Integer/Softmax/Regular",
+    "Kernels/Integer/Add/MultIO",
+    "Kernels/Integer/Add/Regular",
+    "Kernels/Integer/Conv/DW_1D",
+    "Kernels/Integer/Conv/Regular_1D",
+    "Kernels/Integer/Conv/DW_2D",
+    "Kernels/Integer/Conv/Regular_2D",
+    "Kernels/Integer/GEMM/Regular",
+    "Kernels/Integer/MatMul/Add",
+    "Kernels/Integer/MatMul/Regular",
+    "Kernels/Integer/MaxPool",
+    "Kernels/Integer/Pad/Regular_1D",
+    "Kernels/Integer/Pad/Regular_2D",
+    "Kernels/Integer/ReduceMean",
+    "Kernels/Integer/ReduceSum",
+    "Kernels/Integer/Slice",
+    # Special test from TinyViT model layers
+    "Models/TinyViT/5M/Layers/FP32/ReduceMean",
+    # Mixed Precision / Quantization
+    "Kernels/Mixed/Dequant",
+    "Kernels/Mixed/Quant",
+    "Models/Transformer_DeepQuant",
+    "Kernels/Integer/Conv/DW_2D_RQ",
+    "Kernels/Integer/Conv/Regular_2D_RQ",
+    "Kernels/Integer/MatMul/Regular_RQ",
 ]
 
+# Model tests - paths from generic-models job in workflow
 MODEL_TESTS = [
-    "simpleRegression",
-    "WaveFormer",
-    "simpleCNN",
-    "ICCT",
-    "ICCT_ITA",
-    "ICCT_8",
-    "ICCT_ITA_8",
-    "miniMobileNet",
-    "miniMobileNetv2",
-    "CCT/CCT_1_16_16_8",
-    "CCT/CCT_2_32_32_128_Opset20",
-    "testFloatDemoTinyViT",
-    "Autoencoder1D",
+    "Models/Autoencoder1D",
+    "Models/CCT/FP32/CCT_1_16_16_8",
+    "Models/CCT/FP32/CCT_2_32_32_128_Opset20",
+    "Models/CCT/Int/ICCT",
+    "Models/CCT/Int/ICCT_8",
+    "Models/CCT/Int/ICCT_ITA",
+    "Models/CCT/Int/ICCT_ITA_8",
+    "Models/miniMobileNet",
+    "Models/miniMobileNetv2",
+    "Models/CNN_Linear1",
+    "Models/TinyViT/Demo",
+    "Models/WaveFormer",
+    "Models/CNN_Linear2",
 ]
diff --git a/DeeployTest/test_mempool_config.py b/DeeployTest/test_mempool_config.py
index 64660f3ab3..fa0cfd7715 100644
--- a/DeeployTest/test_mempool_config.py
+++ b/DeeployTest/test_mempool_config.py
@@ -12,35 +12,30 @@
 
 # Kernel tests (individual operators)
 KERNEL_TESTS = [
-    "Adder",
-    "MultIO",
-    "test1DConvolution",
-    "test2DConvolution",
-    "test1DDWConvolution",
-    "test2DDWConvolution",
-    "test1DPad",
-    "test2DPad",
-    "testGEMM",
-    "testMatMul",
-    "testMatMulAdd",
-    "testMaxPool",
-    "testRQConv",
-    "testRQGEMM",
-    "testRQMatMul",
-    "testReduceSum",
-    "testReduceMean",
-    "testSlice",
-    "testRequantizedDWConv",
-    "test2DRequantizedConv",
+    "Kernels/Integer/Add/MultIO",
+    "Kernels/Integer/Add/Regular",
+    "Kernels/Integer/Conv/DW_1D",
+    "Kernels/Integer/Conv/Regular_1D",
+    "Kernels/Integer/Conv/DW_2D",
+    "Kernels/Integer/Conv/Regular_2D",
+    "Kernels/Integer/GEMM/Regular",
+    "Kernels/Integer/MatMul/Add",
+    "Kernels/Integer/MatMul/Regular",
+    "Kernels/Integer/MaxPool",
+    "Kernels/Integer/Pad/Regular_1D",
+    "Kernels/Integer/Pad/Regular_2D",
+    "Kernels/Integer/ReduceMean",
+    "Kernels/Integer/ReduceSum",
+    "Kernels/Integer/Slice",
+    "Kernels/Integer/Conv/Regular_2D_RQ",
+    "Kernels/Integer/Conv/DW_2D_RQ",
+    "Kernels/Integer/GEMM/Regular_RQPerRow",
+    "Kernels/Integer/MatMul/Regular_RQ",
 ]
 
 # Model tests (full networks)
 MODEL_TESTS = [
-    "simpleRegression",
-    "simpleCNN",
-    "ICCT",
-    "ICCT_ITA",
-    "ICCT_8",
-    "miniMobileNet",
-    "miniMobileNetv2",
+    "Models/CCT/Int/ICCT",
+    "Models/CCT/Int/ICCT_8",
+    "Models/CCT/Int/ICCT_ITA",
 ]
diff --git a/DeeployTest/test_siracusa_config.py b/DeeployTest/test_siracusa_config.py
index b1f86af97e..0a77d714e8 100644
--- a/DeeployTest/test_siracusa_config.py
+++ b/DeeployTest/test_siracusa_config.py
@@ -7,55 +7,74 @@
 DEFAULT_CORES = 8
 
 KERNEL_TESTS = [
-    "Adder",
-    "MultIO",
-    "test1DPad",
-    "test2DPad",
-    "testMatMul",
-    "testMatMulAdd",
-    "testRequantizedDWConv",
-    "test2DRequantizedConv",
-    "iSoftmax",
-    "testConcat",
-    "testRMSNorm",
-    "trueIntegerDivSandwich",
-    "Hardswish",
-    "RQHardswish",
-    "testBacktracking",
-    "testFloatAdder",
-    "testFloatGEMM",
-    "testFloat2DConvolution",
-    "testFloat2DConvolutionBias",
-    "testFloat2DConvolutionZeroBias",
-    "testFloat2DDWConvolution",
-    "testFloat2DDWConvolutionBias",
-    "testFloat2DDWConvolutionZeroBias",
-    "testFloatLayerNorm",
-    "testFloatRelu",
-    "testFloatMaxPool",
-    "testFloatMatmul",
-    "testFloatSoftmax",
-    "testFloatTranspose",
-    "testFloatMul",
-    "Quant",
-    "Dequant",
-    "testFloatReduceSum",
-    "testFloatReshapeWithSkipConnection",
-    "testFloatSoftmaxGrad",
-    "testFloatSoftmaxCrossEntropy",
-    "testFloatSoftmaxCrossEntropyGrad",
-    "QuantizedLinear",
+    "Kernels/FP32/ReLU",
+    "Kernels/FP32/Softmax/CrossEntropy",
+    "Kernels/FP32/Softmax/CrossEntropyGrad",
+    "Kernels/FP32/Softmax/Grad",
+    "Kernels/FP32/Softmax/Regular",
+    "Kernels/FP32/Add/Regular",
+    "Kernels/FP32/Conv/DW_2D_Bias",
+    "Kernels/FP32/Conv/DW_2D_NoBias",
+    "Kernels/FP32/Conv/DW_2D_ZeroValuedBias",
+    "Kernels/FP32/Conv/Regular_2D_Bias",
+    "Kernels/FP32/Conv/Regular_2D_NoBias",
+    "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias",
+    "Kernels/FP32/GEMM/Regular",
+    "Kernels/FP32/MatMul",
+    "Kernels/FP32/MaxPool",
+    "Kernels/FP32/Mul",
+    "Kernels/FP32/LayerNorm",
+    "Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean",
+    "Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean_Add",
+    "Kernels/FP32/ReduceMean/KeepDims/AllAxes",
+    "Kernels/FP32/ReduceMean/KeepDims/Axes1_2_3",
+    "Kernels/FP32/ReduceMean/KeepDims/Axes1_3",
+    "Kernels/FP32/ReduceMean/KeepDims/Axes2_1",
+    "Kernels/FP32/ReduceMean/KeepDims/Axis0",
+    "Kernels/FP32/ReduceMean/KeepDims/Axis2",
+    "Kernels/FP32/ReduceMean/KeepDims/ReduceMean_Add",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean_Add",
+    "Kernels/FP32/ReduceMean/NoKeepDims/AllAxes",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes1_2_3",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes1_3",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes2_1",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axis0",
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axis2",
+    "Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add",
+    "Kernels/FP32/ReduceSum",
+    "Kernels/FP32/Reshape/SkipConnection",
+    "Kernels/FP32/Transpose",
+    "Kernels/Integer/Hardswish/Regular",
+    "Kernels/Integer/Softmax/Regular",
+    "Kernels/Integer/Add/MultIO",
+    "Kernels/Integer/Add/Regular",
+    "Kernels/Integer/Concat",
+    "Kernels/Integer/MatMul/Add",
+    "Kernels/Integer/MatMul/Regular",
+    "Kernels/Integer/Pad/Regular_1D",
+    "Kernels/Integer/Pad/Regular_2D",
+    "Kernels/Integer/RMSNorm",
+    "Models/TinyViT/5M/Layers/FP32/ReduceMean",
+    "Others/Backtracking",
+    "Kernels/Mixed/Dequant",
+    "Kernels/Mixed/Quant",
+    "Models/Transformer_DeepQuant",
+    "Kernels/Integer/Conv/Regular_2D_RQ",
+    "Kernels/Integer/Conv/DW_2D_RQ",
+    "Kernels/Integer/Hardswish/Regular_RQ",
+    "Kernels/Integer/TrueIntegerDiv",
 ]
 
 MODEL_TESTS = [
-    "simpleRegression",
-    "miniMobileNet",
-    "miniMobileNetv2",
-    "Attention",
-    "MLPerf/KeywordSpotting",
-    "MLPerf/ImageClassification",
-    "MLPerf/AnomalyDetection",
-    "CCT/CCT_1_16_16_8",
-    "CCT/CCT_2_32_32_128_Opset20",
-    "testFloatDemoTinyViT",
+    "Kernels/Integer/Attention",
+    "Models/CCT/FP32/CCT_1_16_16_8",
+    "Models/CCT/FP32/CCT_2_32_32_128_Opset20",
+    "Models/miniMobileNet",
+    "Models/miniMobileNetv2",
+    "Models/MLPerf/KeywordSpotting",
+    "Models/MLPerf/ImageClassification",
+    "Models/MLPerf/AnomalyDetection",
+    "Models/TinyViT/Demo",
+    "Models/CNN_Linear2",
 ]
diff --git a/DeeployTest/test_siracusa_neureka_tiled_config.py b/DeeployTest/test_siracusa_neureka_tiled_config.py
index 66fe52cfe3..68bd3dd96e 100644
--- a/DeeployTest/test_siracusa_neureka_tiled_config.py
+++ b/DeeployTest/test_siracusa_neureka_tiled_config.py
@@ -11,47 +11,47 @@
 # L2 single-buffer kernel tests
 # Format: dict of {test_name: [L1_sizes]}
 L2_SINGLEBUFFER_KERNELS = {
-    "testRequantizedLinear": [16000],
-    "testPointwise": [32000],
-    "testPointwiseConvBNReLU": [32000],
-    "testPointwiseUnsignedWeights": [32000],
+    "Kernels/Integer/GEMM/Regular_RQPerColumn": [16000],
+    "Kernels/Integer/Conv/PW_2D": [32000],
+    "Kernels/Integer/Conv/PW_2D_RQ/Regular_RQ": [32000],
+    "Kernels/Integer/Conv/PW_2D_RQ/Unsigned_RQ": [32000],
 }
 
 # L2 double-buffer kernel tests
 L2_DOUBLEBUFFER_KERNELS = {
-    "testRequantizedLinear": [16000],
-    "testPointwise": [32000],
-    "testPointwiseConvBNReLU": [32000],
-    "testPointwiseUnsignedWeights": [32000],
+    "Kernels/Integer/GEMM/Regular_RQPerColumn": [16000],
+    "Kernels/Integer/Conv/PW_2D": [32000],
+    "Kernels/Integer/Conv/PW_2D_RQ/Regular_RQ": [32000],
+    "Kernels/Integer/Conv/PW_2D_RQ/Unsigned_RQ": [32000],
 }
 
 # L3 single-buffer model tests
 # Format: dict of {test_name: [L1_sizes]}
 L3_SINGLEBUFFER_MODELS = {
-    "miniMobileNet": [2000],
-    "Attention": [2500],
-    "Transformer": [15000],
-    "microLlama/microLlama1": [10000],
+    "Models/miniMobileNet": [2000],
+    "Kernels/Integer/Attention": [2500],
+    "Models/Transformer": [15000],
+    "Models/microLlama/microLlama1": [10000],
 }
 
 # L3 double-buffer model tests
 L3_DOUBLEBUFFER_MODELS = {
-    "miniMobileNet": [2000],
-    "Attention": [5000],
-    "Transformer": [30000],
+    "Models/miniMobileNet": [2000],
+    "Kernels/Integer/Attention": [5000],
+    "Models/Transformer": [30000],
 }
 
 # L2 single-buffer kernel tests with weight memory (neureka-wmem)
 L2_SINGLEBUFFER_KERNELS_WMEM = {
-    "testRequantizedLinear": [16000],
-    "testPointwise": [32000],
-    "testPointwiseConvBNReLU": [32000],
-    "testPointwiseUnsignedWeights": [32000],
+    "Kernels/Integer/GEMM/Regular_RQPerColumn": [16000],
+    "Kernels/Integer/Conv/PW_2D": [32000],
+    "Kernels/Integer/Conv/PW_2D_RQ/Regular_RQ": [32000],
+    "Kernels/Integer/Conv/PW_2D_RQ/Unsigned_RQ": [32000],
 }
 
 # L3 double-buffer model tests with weight memory (neureka-wmem)
 L3_DOUBLEBUFFER_MODELS_WMEM = {
-    "miniMobileNet": [2000],
-    "Attention": [3500],
-    "microLlama/microLlama1": [10000],
+    "Models/miniMobileNet": [2000],
+    "Kernels/Integer/Attention": [3500],
+    "Models/microLlama/microLlama1": [10000],
 }
diff --git a/DeeployTest/test_siracusa_tiled_config.py b/DeeployTest/test_siracusa_tiled_config.py
index 00c0d28dfb..1c0bb0315c 100644
--- a/DeeployTest/test_siracusa_tiled_config.py
+++ b/DeeployTest/test_siracusa_tiled_config.py
@@ -10,113 +10,149 @@
 DEFAULT_SEARCH_STRATEGY = "random-max"
 
 L2_SINGLEBUFFER_KERNELS = {
-    "testMatMul": [64000, 32000, 16000],
-    "test2DRequantizedConv": [8000, 6000, 4000],
-    "test2DRequantizedStriddedPaddedConv": [600],
-    "testRequantizedDWConv": [2561],
-    "iSoftmax": [800, 500, 300],
-    "testConcat": [32000, 16000, 8000],
-    "testRMSNorm": [2048, 1024, 512],
-    "Hardswish": [750],
-    "RQHardswish": [750],
-    "testFloatGEMM": [8000],
-    "testFloat2DConvolution": [1600],
-    "testFloat2DConvolutionBias": [6600],
-    "testFloat2DConvolutionZeroBias": [6600],
-    "testFloat2DDWConvolution": [7200],
-    "testFloat2DDWConvolutionBias": [7200],
-    "testFloat2DDWConvolutionZeroBias": [7200],
-    "testFloatLayerNorm": [2000],
-    "testFloatMaxPool": [2000],
-    "testFloatMatmul": [2000],
-    "testFloatRelu": [2000],
-    "testFloatReshapeWithSkipConnection": [1400],
-    "testFloatSoftmax": [4000],
-    "testFloatTranspose": [2000],
-    "testFloatMul": [2000],
-    "largeFloatAdd": [220000],
-    "testRQGEMMwBatch": [20000],
-    "testMatMulBatch": [20000],
+    "Kernels/FP32/ReLU": [2000],
+    "Kernels/FP32/Softmax/Regular": [4000],
+    "Kernels/FP32/Add/Large": [220000],
+    "Kernels/FP32/Conv/DW_2D_Bias": [7200],
+    "Kernels/FP32/Conv/DW_2D_NoBias": [7200],
+    "Kernels/FP32/Conv/DW_2D_ZeroValuedBias": [7200],
+    "Kernels/FP32/Conv/Regular_2D_Bias": [6600],
+    "Kernels/FP32/Conv/Regular_2D_NoBias": [1600],
+    "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias": [6600],
+    "Kernels/FP32/GEMM/Regular": [8000],
+    "Kernels/FP32/MatMul": [2000],
+    "Kernels/FP32/MaxPool": [2000],
+    "Kernels/FP32/Mul": [2000],
+    "Kernels/FP32/LayerNorm": [2000],
+    "Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean": [8000],
+    "Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean_Add": [8000],
+    "Kernels/FP32/ReduceMean/KeepDims/AllAxes": [50000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axes1_2_3": [50000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axes1_3": [5000, 50000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axes2_1": [6200, 50000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axis0": [8400, 50000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axis2": [8400, 50000],
+    "Kernels/FP32/ReduceMean/KeepDims/ReduceMean_Add": [8000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean": [8000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean_Add": [8000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/AllAxes": [50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes1_2_3": [50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes1_3": [5000, 50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes2_1": [6200, 50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axis0": [8400, 50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axis2": [8400, 50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add": [8000],
+    "Kernels/FP32/Reshape/SkipConnection": [1400],
+    "Kernels/FP32/Transpose": [2000],
+    "Kernels/Integer/Hardswish/Regular": [750],
+    "Kernels/Integer/Softmax/Regular": [800, 500, 300],
+    "Kernels/Integer/Concat": [32000, 16000, 8000],
+    "Kernels/Integer/MatMul/Batch": [20000],
+    "Kernels/Integer/MatMul/Regular": [64000, 32000, 16000],
+    "Kernels/Integer/RMSNorm": [2048, 1024, 512],
+    "Kernels/Integer/Conv/Regular_2D_RQ": [8000, 6000, 4000],
+    "Kernels/Integer/Conv/DW_2D_RQ": [2561],
+    "Kernels/Integer/Conv/StriddedPadded_2D_RQ": [600],
+    "Kernels/Integer/GEMM/Batch_RQ": [20000],
+    "Kernels/Integer/Hardswish/Regular_RQ": [750],
 }
 
 L2_DOUBLEBUFFER_KERNELS = {
-    "testMatMul": [64000, 32000, 16000],
-    "test2DRequantizedConv": [8000, 6000, 5000],
-    "testRequantizedDWConv": [5121],
-    "iSoftmax": [1600, 1000, 600],
-    "testConcat": [64000, 32000, 16000],
-    "testRMSNorm": [4096, 2048, 1024],
-    "Hardswish": [750],
-    "RQHardswish": [800],
-    "testFloatGEMM": [8000],
-    "testFloat2DConvolution": [2000],
-    "testFloat2DConvolutionBias": [8800],
-    "testFloat2DConvolutionZeroBias": [8800],
-    "testFloat2DDWConvolution": [9800],
-    "testFloat2DDWConvolutionBias": [10000],
-    "testFloat2DDWConvolutionZeroBias": [9800],
-    "testFloatLayerNorm": [2000],
-    "testFloatMaxPool": [5000],
-    "testFloatMatmul": [5000],
-    "testFloatRelu": [20],
-    "testFloatReshapeWithSkipConnection": [2600],
-    "testFloatSoftmax": [8000],
-    "testFloatTranspose": [2000],
-    "testFloatMul": [2000],
+    "Kernels/FP32/ReLU": [20],
+    "Kernels/FP32/Softmax/Regular": [8000],
+    "Kernels/FP32/Conv/DW_2D_Bias": [10000],
+    "Kernels/FP32/Conv/DW_2D_NoBias": [9800],
+    "Kernels/FP32/Conv/DW_2D_ZeroValuedBias": [9800],
+    "Kernels/FP32/Conv/Regular_2D_Bias": [8800],
+    "Kernels/FP32/Conv/Regular_2D_NoBias": [2000],
+    "Kernels/FP32/Conv/Regular_2D_ZeroValuedBias": [8800],
+    "Kernels/FP32/GEMM/Regular": [8000],
+    "Kernels/FP32/MatMul": [5000],
+    "Kernels/FP32/MaxPool": [5000],
+    "Kernels/FP32/Mul": [2000],
+    "Kernels/FP32/LayerNorm": [2000],
+    "Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean": [8000],
+    "Kernels/FP32/ReduceMean/KeepDims/Add_ReduceMean_Add": [8000],
+    "Kernels/FP32/ReduceMean/KeepDims/AllAxes": [100000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axes1_2_3": [100000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axes1_3": [10000, 50000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axes2_1": [13000, 50000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axis0": [17000, 50000],
+    "Kernels/FP32/ReduceMean/KeepDims/Axis2": [17000, 50000],
+    "Kernels/FP32/ReduceMean/KeepDims/ReduceMean_Add": [8000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean": [8000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Add_ReduceMean_Add": [8000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/AllAxes": [100000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes1_2_3": [100000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes1_3": [10000, 50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axes2_1": [13000, 50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axis0": [17000, 50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/Axis2": [17000, 50000],
+    "Kernels/FP32/ReduceMean/NoKeepDims/ReduceMean_Add": [8000],
+    "Kernels/FP32/Reshape/SkipConnection": [2600],
+    "Kernels/FP32/Transpose": [2000],
+    "Kernels/Integer/Hardswish/Regular": [750],
+    "Kernels/Integer/Softmax/Regular": [1600, 1000, 600],
+    "Kernels/Integer/Concat": [64000, 32000, 16000],
+    "Kernels/Integer/MatMul/Regular": [64000, 32000, 16000],
+    "Kernels/Integer/RMSNorm": [4096, 2048, 1024],
+    "Kernels/Integer/Conv/Regular_2D_RQ": [8000, 6000, 5000],
+    "Kernels/Integer/Conv/DW_2D_RQ": [5121],
+    "Kernels/Integer/Hardswish/Regular_RQ": [800],
 }
 
 L2_SINGLEBUFFER_MODELS = {
-    "simpleRegression": [45000, 30000, 15000],
-    "miniMobileNet": [60000, 12000, 6000, 3000],
-    "miniMobileNetv2": [60000, 16000, 12000, 8000],
-    "Attention": [60000, 10000, 5000],
-    "microLlama/microLlama1": [60000, 10000, 5000],
-    "microLlama/microLlama8": [60000, 10000, 5000],
-    "microLlama/microLlama8_parallel": [60000, 10000, 5000],
-    "MLPerf/KeywordSpotting": [64000],
-    "MLPerf/ImageClassification": [64000],
-    "MLPerf/AnomalyDetection": [64000],
-    "CCT/CCT_1_16_16_8": [64000],
-    "testFloatDemoTinyViT": [4000],
+    "Models/CNN_Linear2": [45000, 30000, 15000],
+    "Models/miniMobileNet": [60000, 12000, 6000, 3000],
+    "Models/miniMobileNetv2": [60000, 16000, 12000, 8000],
+    "Kernels/Integer/Attention": [60000, 10000, 5000],
+    "Models/microLlama/microLlama1": [60000, 10000, 5000],
+    "Models/microLlama/microLlama8": [60000, 10000, 5000],
+    "Models/microLlama/microLlama8_parallel": [60000, 10000, 5000],
+    "Models/MLPerf/KeywordSpotting": [64000],
+    "Models/MLPerf/ImageClassification": [64000],
+    "Models/MLPerf/AnomalyDetection": [64000],
+    "Models/CCT/FP32/CCT_1_16_16_8": [64000],
+    "Models/TinyViT/Demo": [4000],
 }
 
 L2_DOUBLEBUFFER_MODELS = {
-    "simpleRegression": [60000, 45000, 30000],
-    "miniMobileNet": [60000, 24000, 12000, 6000],
-    "miniMobileNetv2": [60000, 32000, 24000, 16000],
-    "Attention": [60000, 20000, 10000, 5000],
-    "microLlama/microLlama1": [60000, 20000, 10000],
-    "microLlama/microLlama8": [60000, 20000, 10000],
-    "microLlama/microLlama8_parallel": [60000, 20000, 10000],
-    "MLPerf/KeywordSpotting": [128000],
-    "MLPerf/ImageClassification": [128000],
-    "MLPerf/AnomalyDetection": [128000],
-    "CCT/CCT_1_16_16_8": [128000],
-    "testFloatDemoTinyViT": [8000],
+    "Models/CNN_Linear2": [60000, 45000, 30000],
+    "Models/miniMobileNet": [60000, 24000, 12000, 6000],
+    "Models/miniMobileNetv2": [60000, 32000, 24000, 16000],
+    "Kernels/Integer/Attention": [60000, 20000, 10000, 5000],
+    "Models/microLlama/microLlama1": [60000, 20000, 10000],
+    "Models/microLlama/microLlama8": [60000, 20000, 10000],
+    "Models/microLlama/microLlama8_parallel": [60000, 20000, 10000],
+    "Models/MLPerf/KeywordSpotting": [128000],
+    "Models/MLPerf/ImageClassification": [128000],
+    "Models/MLPerf/AnomalyDetection": [128000],
+    "Models/CCT/FP32/CCT_1_16_16_8": [128000],
+    "Models/TinyViT/Demo": [8000],
 }
 
 L3_SINGLEBUFFER_MODELS = {
-    "simpleRegression": [45000, 30000, 16000],
-    "miniMobileNet": [60000, 12000, 6000],
-    "miniMobileNetv2": [60000, 16000, 12000, 8000],
-    "Attention": [60000, 10000, 5000, 2500],
-    "Transformer": [60000, 30000, 15000],
-    "microLlama/microLlama1": [60000, 10000, 5000],
-    "CCT/CCT_2_32_32_128": [128000],
-    "testTrainCCT/CCT2_FT2": [128000],
-    "testFloatDemoTinyViT": [4000],
+    "Models/CNN_Linear2": [45000, 30000, 16000],
+    "Models/miniMobileNet": [60000, 12000, 6000],
+    "Models/miniMobileNetv2": [60000, 16000, 12000, 8000],
+    "Kernels/Integer/Attention": [60000, 10000, 5000, 2500],
+    "Models/Transformer": [60000, 30000, 15000],
+    "Models/microLlama/microLlama1": [60000, 10000, 5000],
+    "Models/CCT/FP32/CCT_2_32_32_128": [128000],
+    "Models/CCT_Train/CCT2_FT2": [128000],
+    "Models/TinyViT/Demo": [4000],
 }
 
 L3_DOUBLEBUFFER_MODELS = {
-    "simpleRegression": [60000, 45000, 30000],
-    "miniMobileNet": [60000, 24000, 12000, 6000],
-    "miniMobileNetv2": [60000, 32000, 24000, 16000],
-    "Attention": [60000, 20000, 10000, 5000],
-    "Transformer": [60000, 30000, 15000],
-    "microLlama/microLlama1": [60000, 20000, 10000],
-    "microLlama/microLlama8": [60000, 20000, 10000],
-    "microLlama/microLlama8_parallel": [60000, 20000, 10000],
-    "CCT/CCT_2_32_32_128": [128000],
-    "testTrainCCT/CCT2_FT2": [128000],
-    "testFloatDemoTinyViT": [4000],
+    "Models/CNN_Linear2": [60000, 45000, 30000],
+    "Models/miniMobileNet": [60000, 24000, 12000, 6000],
+    "Models/miniMobileNetv2": [60000, 32000, 24000, 16000],
+    "Kernels/Integer/Attention": [60000, 20000, 10000, 5000],
+    "Models/Transformer": [60000, 30000, 15000],
+    "Models/microLlama/microLlama1": [60000, 20000, 10000],
+    "Models/microLlama/microLlama8": [60000, 20000, 10000],
+    "Models/microLlama/microLlama8_parallel": [60000, 20000, 10000],
+    "Models/CCT/FP32/CCT_2_32_32_128": [128000],
+    "Models/CCT_Train/CCT2_FT2": [128000],
+    "Models/TinyViT/Demo": [4000],
 }
diff --git a/DeeployTest/test_snitch_config.py b/DeeployTest/test_snitch_config.py
index b3b1c7b776..f51b2ede23 100644
--- a/DeeployTest/test_snitch_config.py
+++ b/DeeployTest/test_snitch_config.py
@@ -9,16 +9,16 @@
 DEFAULT_NUM_CORES = 9
 
 KERNEL_TESTS = [
-    "Adder",
-    "iSoftmax",
-    "TestiNoNorm",
-    "TestAdderLarge",
-    "TestiSoftmaxLarge",
-    "testMatMul",
-    "testRQGEMM",
-    "TestRQAdd",
-    "testRQGEMMTransB",
-    "testFloatSoftmax",
+    "Kernels/FP32/Softmax/Regular",
+    "Kernels/Integer/Add/Large",
+    "Kernels/Integer/Add/Regular",
+    "Kernels/Integer/Softmax/Large",
+    "Kernels/Integer/Softmax/Regular",
+    "Kernels/Integer/MatMul/Regular",
+    "Kernels/Integer/iNoNorm",
+    "Kernels/Integer/GEMM/Regular_RQPerRow",
+    "Kernels/Integer/Add/Regular_RQ",
+    "Kernels/Integer/GEMM/TransB_RQ",
 ]
 
 MODEL_TESTS = []
diff --git a/DeeployTest/test_snitch_tiled_config.py b/DeeployTest/test_snitch_tiled_config.py
index 4e3662da05..3f81239fce 100644
--- a/DeeployTest/test_snitch_tiled_config.py
+++ b/DeeployTest/test_snitch_tiled_config.py
@@ -11,14 +11,14 @@
 # L2 single-buffer tests with different L1 sizes
 # Format: {test_name: [L1_sizes]}
 L2_SINGLEBUFFER_KERNELS = {
-    "TestiNoNorm": [5000, 10000],
-    "TestAdderLarge": [5000, 10000],
-    "TestiSoftmaxLarge": [5000, 10000],
-    "testRQGEMM": [2000, 5000],
-    "testFloatSoftmax": [2000, 5000, 10000],
-    "TestRQAdd": [5000, 10000],
-    "testFloatGEMM": [2000, 5000, 10000],
-    "testFloatGEMMtransB": [2000, 5000, 10000],
+    "Kernels/Integer/Add/Large": [5000, 10000],
+    "Kernels/Integer/Softmax/Large": [5000, 10000],
+    "Kernels/FP32/Softmax/Regular": [2000, 5000, 10000],
+    "Kernels/FP32/GEMM/Regular": [2000, 5000, 10000],
+    "Kernels/FP32/GEMM/TransB": [2000, 5000, 10000],
+    "Kernels/Integer/iNoNorm": [5000, 10000],
+    "Kernels/Integer/Add/Regular_RQ": [5000, 10000],
+    "Kernels/Integer/GEMM/Regular_RQPerRow": [2000, 5000],
 }
 
 L2_SINGLEBUFFER_MODELS = {}
diff --git a/DeeployTest/test_softhier_config.py b/DeeployTest/test_softhier_config.py
index f5e03eddac..18d2f61c7d 100644
--- a/DeeployTest/test_softhier_config.py
+++ b/DeeployTest/test_softhier_config.py
@@ -9,7 +9,7 @@
 DEFAULT_NUM_CLUSTERS = 1
 
 KERNEL_TESTS = [
-    "Adder",
+    "Kernels/Integer/Add/Regular",
 ]
 
 MODEL_TESTS = []

From 6023ee0633d5f1e7fbe4bd6b7afe6fb5490e0bac Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 9 Jan 2026 15:09:17 +0100
Subject: [PATCH 39/51] Fix test path

---
 .github/workflows/ci-deeploy.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci-deeploy.yml b/.github/workflows/ci-deeploy.yml
index a0cbdba7b2..a45bf84df5 100644
--- a/.github/workflows/ci-deeploy.yml
+++ b/.github/workflows/ci-deeploy.yml
@@ -248,19 +248,19 @@ jobs:
         include:
           - name: fail-input0
             platform: Generic
-            test: testTypeInferenceDifferentTypes
+            test: Others/TypeInference
             type_map: "A=int8_t B=int8_t C=int8_t"
             offset_map: "A=0 B=0 C=0"
             shouldFail: true
           - name: fail-input2
             platform: Generic
-            test: testTypeInferenceDifferentTypes
+            test: Others/TypeInference
             type_map: "A=int16_t B=int8_t C=int16_t"
             offset_map: "A=0 B=0 C=0"
             shouldFail: true
           - name: pass
             platform: Generic
-            test: testTypeInferenceDifferentTypes
+            test: Others/TypeInference
             type_map: "A=int16_t B=int8_t C=int32_t"
             offset_map: "A=0 B=0 C=0"
             shouldFail: false

From c62bf60f744d0ca96f5a2acaefebc3ffaaed862a Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 9 Jan 2026 15:15:36 +0100
Subject: [PATCH 40/51] Format and lint

---
 .github/workflows/ci-platform-cortexm.yml   | 1 -
 .github/workflows/ci-platform-generic.yml   | 1 -
 .github/workflows/ci-platform-snitch.yml    | 1 -
 .github/workflows/ci-platform-softhier.yml  | 1 -
 .github/workflows/infra-generate-ccache.yml | 1 -
 5 files changed, 5 deletions(-)

diff --git a/.github/workflows/ci-platform-cortexm.yml b/.github/workflows/ci-platform-cortexm.yml
index 92cc04af9f..0e03e17d0b 100644
--- a/.github/workflows/ci-platform-cortexm.yml
+++ b/.github/workflows/ci-platform-cortexm.yml
@@ -44,4 +44,3 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "models"
-
diff --git a/.github/workflows/ci-platform-generic.yml b/.github/workflows/ci-platform-generic.yml
index 34944fa39a..83c191180f 100644
--- a/.github/workflows/ci-platform-generic.yml
+++ b/.github/workflows/ci-platform-generic.yml
@@ -44,4 +44,3 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "models"
-      
\ No newline at end of file
diff --git a/.github/workflows/ci-platform-snitch.yml b/.github/workflows/ci-platform-snitch.yml
index 5b1a1d5727..c1ae694148 100644
--- a/.github/workflows/ci-platform-snitch.yml
+++ b/.github/workflows/ci-platform-snitch.yml
@@ -36,4 +36,3 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels"
-
diff --git a/.github/workflows/ci-platform-softhier.yml b/.github/workflows/ci-platform-softhier.yml
index e1c534ec4c..28a85160be 100644
--- a/.github/workflows/ci-platform-softhier.yml
+++ b/.github/workflows/ci-platform-softhier.yml
@@ -36,4 +36,3 @@ jobs:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
       pytest-marker: "kernels"
-
diff --git a/.github/workflows/infra-generate-ccache.yml b/.github/workflows/infra-generate-ccache.yml
index ae2d370bc0..e4d00ea911 100644
--- a/.github/workflows/infra-generate-ccache.yml
+++ b/.github/workflows/infra-generate-ccache.yml
@@ -50,4 +50,3 @@ jobs:
         with:
           path: /app/.ccache
           key: ccache-ci
-            
\ No newline at end of file

From eae4932ee0fd25dc5959d5658d522f0f24fb0916 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 9 Jan 2026 15:45:15 +0100
Subject: [PATCH 41/51] Move internal deeploy test to pytest

---
 .github/workflows/ci-deeploy.yml     | 229 +----------------
 DeeployTest/conftest.py              |   3 +
 DeeployTest/test_deeploy_internal.py | 372 +++++++++++++++++++++++++++
 DeeployTest/test_dmas.py             |   6 +-
 4 files changed, 381 insertions(+), 229 deletions(-)
 create mode 100644 DeeployTest/test_deeploy_internal.py

diff --git a/.github/workflows/ci-deeploy.yml b/.github/workflows/ci-deeploy.yml
index a45bf84df5..fc468306b1 100644
--- a/.github/workflows/ci-deeploy.yml
+++ b/.github/workflows/ci-deeploy.yml
@@ -43,7 +43,7 @@ jobs:
         shell: bash
         run: pip install -e .
 
-  deeploy-memory-allocation:
+  deeploy-internal-tests:
     needs: select-env
     runs-on: ${{ needs.select-env.outputs.runner }}
     container:
@@ -56,231 +56,8 @@ jobs:
       - name: Build Deeploy
         shell: bash
         run: pip install -e .
-      - name: Run Test
+      - name: Run Internal Tests
         shell: bash
         run: |
           cd DeeployTest
-          python testMVP.py -t Tests/Models/CCT/FP32/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=75000 --memAllocStrategy=MiniMalloc
-          python testMVP.py -t Tests/Models/CCT/FP32/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=60000 --memAllocStrategy=MiniMalloc --shouldFail
-          python testMVP.py -t Tests/Models/CCT/FP32/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=90000 --memAllocStrategy=TetrisRandom
-          python testMVP.py -t Tests/Models/CCT/FP32/CCT_1_16_16_8 -p Siracusa --defaultMemLevel=L2 --l1=64000 --l2=75000 --memAllocStrategy=TetrisRandom --shouldFail
-
-  deeploy-state-serialization:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          python deeployStateEqualityTest.py -t ./Tests/Models/CNN_Linear2 -p QEMU-ARM
-          python deeployStateEqualityTest.py -t ./Tests/Models/CNN_Linear2 -p Siracusa
-          python deeployStateEqualityTest.py -t ./Tests/Models/CNN_Linear2 -p MemPool
-          python deeployStateEqualityTest.py -t ./Tests/Models/CNN_Linear2 -p Generic
-
-  deeploy-memory-level-extension:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          python testMemoryLevelExtension.py -t ./Tests/Models/CNN_Linear2 -p QEMU-ARM
-          python testMemoryLevelExtension.py -t ./Tests/Models/CNN_Linear2 -p Siracusa
-          python testMemoryLevelExtension.py -t ./Tests/Models/CNN_Linear2 -p MemPool
-          python testMemoryLevelExtension.py -t ./Tests/Models/CNN_Linear2 -p Generic
-
-  deeploy-tiler-extension:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          python testTilerExtension.py -p Siracusa -t ./Tests/Models/CNN_Linear2
-          python testTilerExtension.py -p Siracusa -t ./Tests/Models/CNN_Linear1
-          python testTilerExtension.py -p Siracusa -t ./Tests/Kernels/Integer/MatMul/Regular
-          python testTilerExtension.py -p Siracusa -t ./Tests/Kernels/Integer/MaxPool
-          python testTilerExtension.py -p Siracusa -t ./Tests/Models/CNN_Linear2 --l1 2000 --shouldFail
-          python testTilerExtension.py -p Siracusa -t ./Tests/Models/CNN_Linear1 --l1 2000 --shouldFail
-          python testTilerExtension.py -p Siracusa -t ./Tests/Kernels/Integer/MatMul/Regular --l1 2000 --shouldFail
-          python testTilerExtension.py -p Siracusa -t ./Tests/Kernels/Integer/MaxPool --l1 2000 --shouldFail
-
-  deeploy-memory-allocation-extension:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          python testTilerExtension.py -p Siracusa -t ./Tests/Models/CNN_Linear2
-          python testTilerExtension.py -p Siracusa -t ./Tests/Models/CNN_Linear1
-          python testTilerExtension.py -p Siracusa -t ./Tests/Models/miniMobileNet
-          python testTilerExtension.py -p Siracusa -t ./Tests/Models/miniMobileNetv2
-          python testTilerExtension.py -p Siracusa -t ./Tests/Kernels/Integer/MatMul/Regular
-          python testTilerExtension.py -p Siracusa -t ./Tests/Kernels/Integer/MaxPool
-
-  deeploy-typing:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          python testTypes.py
-
-  deeploy-debug:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          python testPrintInputOutputTransformation.py -p Generic -t ./Tests/Models/CNN_Linear2
-          python testPrintInputOutputTransformation.py -p Siracusa -t ./Tests/Models/CNN_Linear2
-          python testDebugPrintPass.py -p Generic -t ./Tests/Models/CNN_Linear2
-
-  deeploy-regex-matching:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          python testRegexMatching.py
-
-  deeploy-test-dmas:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          pytest test_dmas.py -v -n 4
-
-  generate-network-type-inference:
-    needs: select-env
-    runs-on: ${{ needs.select-env.outputs.runner }}
-    container:
-      image: ${{ needs.select-env.outputs.image }}
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - name: fail-input0
-            platform: Generic
-            test: Others/TypeInference
-            type_map: "A=int8_t B=int8_t C=int8_t"
-            offset_map: "A=0 B=0 C=0"
-            shouldFail: true
-          - name: fail-input2
-            platform: Generic
-            test: Others/TypeInference
-            type_map: "A=int16_t B=int8_t C=int16_t"
-            offset_map: "A=0 B=0 C=0"
-            shouldFail: true
-          - name: pass
-            platform: Generic
-            test: Others/TypeInference
-            type_map: "A=int16_t B=int8_t C=int32_t"
-            offset_map: "A=0 B=0 C=0"
-            shouldFail: false
-    name: Test Type Inference (${{ matrix.name }})
-    steps:
-      - name: Checkout Repo
-        uses: actions/checkout@v4
-        with:
-          submodules: recursive
-      - name: Build Deeploy
-        shell: bash
-        run: pip install -e .
-      - name: Run Test
-        shell: bash
-        run: |
-          cd DeeployTest
-          python generateNetwork.py \
-            -p ${{ matrix.platform }} \
-            -t ./Tests/${{ matrix.test }} \
-            -v \
-            --input-type-map ${{ matrix.type_map }} \
-            --input-offset-map  ${{ matrix.offset_map }} \
-            ${{ matrix.shouldFail && '--shouldFail' || '' }}
+          pytest -v -m deeploy_internal -n 4
diff --git a/DeeployTest/conftest.py b/DeeployTest/conftest.py
index 981fc6a7a9..8c4847a296 100644
--- a/DeeployTest/conftest.py
+++ b/DeeployTest/conftest.py
@@ -68,6 +68,9 @@ def pytest_configure(config: pytest.Config) -> None:
     config.addinivalue_line("markers", "l3: mark test as L3 default memory level")
     config.addinivalue_line("markers", "wmem: mark test as using Neureka weight memory")
     config.addinivalue_line("markers", "dma: mark test as DMA test")
+    config.addinivalue_line(
+        "markers",
+        "deeploy_internal: mark test as internal Deeploy test (state serialization, extensions, transformations)")
 
     # Configure logging based on verbosity
     verbosity = config.option.verbose
diff --git a/DeeployTest/test_deeploy_internal.py b/DeeployTest/test_deeploy_internal.py
new file mode 100644
index 0000000000..14f7c3fc15
--- /dev/null
+++ b/DeeployTest/test_deeploy_internal.py
@@ -0,0 +1,372 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import subprocess
+from pathlib import Path
+
+import pytest
+
+# Mark all tests in this module as deeploy_internal
+pytestmark = pytest.mark.deeploy_internal
+
+
+@pytest.mark.parametrize("platform", ["QEMU-ARM", "Siracusa", "MemPool", "Generic"])
+def test_deeploy_state_serialization(platform):
+    """Test that Deeploy state can be serialized and deserialized correctly."""
+    script_dir = Path(__file__).parent
+    cmd = [
+        "python",
+        str(script_dir / "deeployStateEqualityTest.py"),
+        "-t",
+        "./Tests/Models/CNN_Linear2",
+        "-p",
+        platform,
+    ]
+    result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+    assert result.returncode == 0, (f"State serialization test failed for platform {platform}\n"
+                                    f"stdout: {result.stdout}\n"
+                                    f"stderr: {result.stderr}")
+
+
+@pytest.mark.parametrize("platform", ["QEMU-ARM", "Siracusa", "MemPool", "Generic"])
+def test_memory_level_extension(platform):
+    """Test memory level extension functionality."""
+    script_dir = Path(__file__).parent
+    cmd = [
+        "python",
+        str(script_dir / "testMemoryLevelExtension.py"),
+        "-t",
+        "./Tests/Models/CNN_Linear2",
+        "-p",
+        platform,
+    ]
+    result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+    assert result.returncode == 0, (f"Memory level extension test failed for platform {platform}\n"
+                                    f"stdout: {result.stdout}\n"
+                                    f"stderr: {result.stderr}")
+
+
+class TestMemoryAllocation:
+    """Test memory allocation strategies and constraints."""
+
+    def test_minimalloc_sufficient_memory(self):
+        """Test MiniMalloc strategy with sufficient L2 memory."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "testMVP.py"),
+            "-t",
+            "Tests/Models/CCT/FP32/CCT_1_16_16_8",
+            "-p",
+            "Siracusa",
+            "--defaultMemLevel=L2",
+            "--l1=64000",
+            "--l2=75000",
+            "--memAllocStrategy=MiniMalloc",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (f"Memory allocation test (MiniMalloc, L2=75000) failed\n"
+                                        f"stdout: {result.stdout}\n"
+                                        f"stderr: {result.stderr}")
+
+    def test_minimalloc_insufficient_memory(self):
+        """Test that MiniMalloc correctly fails with insufficient L2 memory."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "testMVP.py"),
+            "-t",
+            "Tests/Models/CCT/FP32/CCT_1_16_16_8",
+            "-p",
+            "Siracusa",
+            "--defaultMemLevel=L2",
+            "--l1=64000",
+            "--l2=60000",
+            "--memAllocStrategy=MiniMalloc",
+            "--shouldFail",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (
+            f"Memory allocation test (MiniMalloc should fail, L2=60000) did not behave as expected\n"
+            f"stdout: {result.stdout}\n"
+            f"stderr: {result.stderr}")
+
+    def test_tetrisrandom_sufficient_memory(self):
+        """Test TetrisRandom strategy with sufficient L2 memory."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "testMVP.py"),
+            "-t",
+            "Tests/Models/CCT/FP32/CCT_1_16_16_8",
+            "-p",
+            "Siracusa",
+            "--defaultMemLevel=L2",
+            "--l1=64000",
+            "--l2=90000",
+            "--memAllocStrategy=TetrisRandom",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (f"Memory allocation test (TetrisRandom, L2=90000) failed\n"
+                                        f"stdout: {result.stdout}\n"
+                                        f"stderr: {result.stderr}")
+
+    def test_tetrisrandom_insufficient_memory(self):
+        """Test that TetrisRandom correctly fails with insufficient L2 memory."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "testMVP.py"),
+            "-t",
+            "Tests/Models/CCT/FP32/CCT_1_16_16_8",
+            "-p",
+            "Siracusa",
+            "--defaultMemLevel=L2",
+            "--l1=64000",
+            "--l2=75000",
+            "--memAllocStrategy=TetrisRandom",
+            "--shouldFail",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (
+            f"Memory allocation test (TetrisRandom should fail, L2=75000) did not behave as expected\n"
+            f"stdout: {result.stdout}\n"
+            f"stderr: {result.stderr}")
+
+
+class TestTilerExtension:
+    """Test tiling extension functionality."""
+
+    @pytest.mark.parametrize("test_path", [
+        "./Tests/Models/CNN_Linear2",
+        "./Tests/Models/CNN_Linear1",
+        "./Tests/Kernels/Integer/MatMul/Regular",
+        "./Tests/Kernels/Integer/MaxPool",
+    ])
+    def test_tiler_basic(self, test_path):
+        """Test that tiler can process various networks without L1 constraints."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "testTilerExtension.py"),
+            "-p",
+            "Siracusa",
+            "-t",
+            test_path,
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (f"Tiler extension test failed for {test_path}\n"
+                                        f"stdout: {result.stdout}\n"
+                                        f"stderr: {result.stderr}")
+
+    @pytest.mark.parametrize("test_path", [
+        "./Tests/Models/CNN_Linear2",
+        "./Tests/Models/CNN_Linear1",
+        "./Tests/Kernels/Integer/MatMul/Regular",
+        "./Tests/Kernels/Integer/MaxPool",
+    ])
+    def test_tiler_constrained_should_fail(self, test_path):
+        """Test that tiler correctly fails when L1 memory is too small."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "testTilerExtension.py"),
+            "-p",
+            "Siracusa",
+            "-t",
+            test_path,
+            "--l1",
+            "2000",
+            "--shouldFail",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (
+            f"Tiler extension test (should fail) did not behave as expected for {test_path}\n"
+            f"stdout: {result.stdout}\n"
+            f"stderr: {result.stderr}")
+
+    @pytest.mark.parametrize("test_path", [
+        "./Tests/Models/CNN_Linear2",
+        "./Tests/Models/CNN_Linear1",
+        "./Tests/Models/miniMobileNet",
+        "./Tests/Models/miniMobileNetv2",
+        "./Tests/Kernels/Integer/MatMul/Regular",
+        "./Tests/Kernels/Integer/MaxPool",
+    ])
+    def test_tiler_double_buffer(self, test_path):
+        """Test tiler with double buffering enabled."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "testTilerExtension.py"),
+            "-p",
+            "Siracusa",
+            "-t",
+            test_path,
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (f"Tiler extension test (double buffer) failed for {test_path}\n"
+                                        f"stdout: {result.stdout}\n"
+                                        f"stderr: {result.stderr}")
+
+
+def test_types():
+    """Test Deeploy type system (serialization, equivalence, promotion)."""
+    script_dir = Path(__file__).parent
+    cmd = [
+        "python",
+        str(script_dir / "testTypes.py"),
+    ]
+    result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+    assert result.returncode == 0, (f"Types test failed\n"
+                                    f"stdout: {result.stdout}\n"
+                                    f"stderr: {result.stderr}")
+
+
+class TestDebugTransformations:
+    """Test debug and diagnostic transformations."""
+
+    @pytest.mark.parametrize("platform", ["Generic", "Siracusa"])
+    def test_print_input_output_transformation(self, platform):
+        """Test print input/output transformation for debugging."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "testPrintInputOutputTransformation.py"),
+            "-p",
+            platform,
+            "-t",
+            "./Tests/Models/CNN_Linear2",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (f"Print I/O transformation test failed for platform {platform}\n"
+                                        f"stdout: {result.stdout}\n"
+                                        f"stderr: {result.stderr}")
+
+    def test_debug_print_pass(self):
+        """Test debug print pass transformation."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "testDebugPrintPass.py"),
+            "-p",
+            "Generic",
+            "-t",
+            "./Tests/Models/CNN_Linear2",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (f"Debug print pass test failed\n"
+                                        f"stdout: {result.stdout}\n"
+                                        f"stderr: {result.stderr}")
+
+
+def test_regex_matching():
+    """Test regex matching utilities."""
+    script_dir = Path(__file__).parent
+    cmd = [
+        "python",
+        str(script_dir / "testRegexMatching.py"),
+    ]
+    result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+    assert result.returncode == 0, (f"Regex matching test failed\n"
+                                    f"stdout: {result.stdout}\n"
+                                    f"stderr: {result.stderr}")
+
+
+class TestTypeInference:
+    """Test type inference functionality with different input type configurations."""
+
+    def test_type_inference_fail_all_int8(self):
+        """Test that type inference correctly fails when all inputs are int8."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "generateNetwork.py"),
+            "-p",
+            "Generic",
+            "-t",
+            "./Tests/Others/TypeInference",
+            "-v",
+            "--input-type-map",
+            "A=int8_t",
+            "B=int8_t",
+            "C=int8_t",
+            "--input-offset-map",
+            "A=0",
+            "B=0",
+            "C=0",
+            "--shouldFail",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (f"Type inference test (should fail with all int8) did not behave as expected\n"
+                                        f"stdout: {result.stdout}\n"
+                                        f"stderr: {result.stderr}")
+
+    def test_type_inference_fail_incompatible_output(self):
+        """Test that type inference correctly fails with incompatible output type."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "generateNetwork.py"),
+            "-p",
+            "Generic",
+            "-t",
+            "./Tests/Others/TypeInference",
+            "-v",
+            "--input-type-map",
+            "A=int16_t",
+            "B=int8_t",
+            "C=int16_t",
+            "--input-offset-map",
+            "A=0",
+            "B=0",
+            "C=0",
+            "--shouldFail",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (
+            f"Type inference test (should fail with incompatible output) did not behave as expected\n"
+            f"stdout: {result.stdout}\n"
+            f"stderr: {result.stderr}")
+
+    def test_type_inference_pass(self):
+        """Test that type inference succeeds with correct type configuration."""
+        script_dir = Path(__file__).parent
+        cmd = [
+            "python",
+            str(script_dir / "generateNetwork.py"),
+            "-p",
+            "Generic",
+            "-t",
+            "./Tests/Others/TypeInference",
+            "-v",
+            "--input-type-map",
+            "A=int16_t",
+            "B=int8_t",
+            "C=int32_t",
+            "--input-offset-map",
+            "A=0",
+            "B=0",
+            "C=0",
+        ]
+        result = subprocess.run(cmd, cwd = script_dir, capture_output = True, text = True)
+
+        assert result.returncode == 0, (f"Type inference test (should pass) failed\n"
+                                        f"stdout: {result.stdout}\n"
+                                        f"stderr: {result.stderr}")
diff --git a/DeeployTest/test_dmas.py b/DeeployTest/test_dmas.py
index 96c72fd919..938459ae62 100644
--- a/DeeployTest/test_dmas.py
+++ b/DeeployTest/test_dmas.py
@@ -222,7 +222,7 @@ def setup_dma_deployer(dma_type: str, input_shape: tuple, tile_shape: tuple, nod
     return deployer, test_inputs, test_outputs
 
 
-@pytest.mark.dma
+@pytest.mark.deeploy_internal
 @pytest.mark.parametrize("test_shape", DMA_TEST_SHAPES, ids = param_id_dma)
 @pytest.mark.parametrize("doublebuffer", [True, False], ids = param_id_dma)
 def test_mchan_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen,
@@ -272,7 +272,7 @@ def test_mchan_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolch
         assert result.error_count == 0, f"Found {result.error_count} errors"
 
 
-@pytest.mark.dma
+@pytest.mark.deeploy_internal
 @pytest.mark.parametrize("test_shape", DMA_TEST_SHAPES, ids = param_id_dma)
 @pytest.mark.parametrize("doublebuffer", [True, False], ids = param_id_dma)
 def test_l3_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen,
@@ -322,7 +322,7 @@ def test_l3_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain
         assert result.error_count == 0, f"Found {result.error_count} errors"
 
 
-@pytest.mark.dma
+@pytest.mark.deeploy_internal
 @pytest.mark.parametrize("test_shape", DMA_TEST_SHAPES, ids = param_id_dma)
 @pytest.mark.parametrize("doublebuffer", [True, False], ids = param_id_dma)
 def test_snitch_dma(test_shape, doublebuffer, deeploy_test_dir, toolchain, toolchain_dir, cmake_args, skipgen,

From 7b049026a94f74af5c7b4b1e55bca0fa92f8acbb Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Mon, 12 Jan 2026 11:07:26 +0100
Subject: [PATCH 42/51] Improve DeeployTest Readme

---
 DeeployTest/README.md | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/DeeployTest/README.md b/DeeployTest/README.md
index b5db1988b0..ac56399e62 100644
--- a/DeeployTest/README.md
+++ b/DeeployTest/README.md
@@ -11,11 +11,17 @@ The test suite is located in the `DeeployTest` folder, all commands below are as
 @pytest.mark.mempool: mark test as a MemPool platform test
 ```
 
-You can run all test from a given mark group with `pytest -m <marker-name> -v`. Each platform has a given marker, if you want to run all tests from the generic platform, you can use `pytest -m generic -v`.
+You can run all test from a given mark group with `pytest -m <marker-name> -v`. Each platform has a given marker, if you want to run all tests from the generic platform, you can use:
+```
+pytest -m generic -v
+```
 
-You can use boolean expressions on the markers to execute unions or intersections of markers. For instance, to execute only the kernel tests from the generic platform, one can use `pytest -m 'generic and kernels' -v`.
+You can use boolean expressions on the markers to execute unions or intersections of markers. For instance, to execute only the kernel tests from the generic platform, one can use:
+```
+pytest -m 'generic and kernels' -v
+```
 
-To display the tests captured by a given marker or expression, you can use the `--collect-only` flag. For instance, to list the kernel tests on the Siracusa with Neureka platform that are from L2 and single-buffered, I can use `pytest -m 'siracusa_neureka_tiled and kernels and l2 and singlebuffer' -v --collect-only`, which returns:
+To display the tests captured by a given marker or expression, you can use the `--collect-only` flag. For instance, to list the kernel tests on the Siracusa with Neureka platform that are from L2 and single-buffered, you can use `pytest -m 'siracusa_neureka_tiled and kernels and l2 and singlebuffer' -v --collect-only`, which returns:
 
 ```
 platform linux -- Python 3.10.0, pytest-9.0.2, pluggy-1.6.0 -- /usr/scratch/normandie/jungvi/micromamba/envs/deeploy/bin/python3.10
@@ -39,7 +45,10 @@ collected 378 items / 370 deselected / 8 selected
 
 ### Executing a Single Test
 
-To run a single test, one can use the test identifier from the `--collect-only` output, for instance `pytest 'test_platforms.py::test_siracusa_neureka_tiled_kernels_l2_singlebuffer[testRequantizedLinear-16000-L2-singlebuffer]' -v`.
+To run a single test, one can use the test identifier from the `--collect-only` output, for instance:
+```
+pytest 'test_platforms.py::test_siracusa_neureka_tiled_kernels_l2_singlebuffer[testRequantizedLinear-16000-L2-singlebuffer]' -v
+```
 
 ### Controlling Test Verbosity
 

From 0cd8ce2ec46a8d2e2d3b4e6195f4fd846d700cec Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Mon, 12 Jan 2026 11:30:22 +0100
Subject: [PATCH 43/51] Apply code rabbit suggestions

---
 .github/workflows/_runner-siracusa.yml     |  6 +++---
 .github/workflows/ci-platform-siracusa.yml |  4 ++--
 DeeployTest/test_platforms.py              | 15 ---------------
 3 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/_runner-siracusa.yml b/.github/workflows/_runner-siracusa.yml
index eb9a098b5f..98425303da 100644
--- a/.github/workflows/_runner-siracusa.yml
+++ b/.github/workflows/_runner-siracusa.yml
@@ -14,10 +14,10 @@ name: _runner-siracusa
       docker-image:
         required: true
         type: string
-      test-type:
+      pytest-marker:
         required: true
         type: string
-        description: "Type of tests to run: kernels or models"
+        description: "Pytest marker for test selection (e.g., 'kernels', 'models')"
 
 jobs:
   test-runner-siracusa:
@@ -44,5 +44,5 @@ jobs:
           export CCACHE_DIR=/app/.ccache
 
           # Run tests using pytest markers
-          pytest test_platforms.py::test_siracusa_${{ inputs.test-type }} -v -s
+          pytest test_platforms.py -m -v -n 4 "siracusa and ${{ inputs.pytest-marker }}"
         shell: bash
diff --git a/.github/workflows/ci-platform-siracusa.yml b/.github/workflows/ci-platform-siracusa.yml
index 9cec1ef896..8e102cdc78 100644
--- a/.github/workflows/ci-platform-siracusa.yml
+++ b/.github/workflows/ci-platform-siracusa.yml
@@ -35,7 +35,7 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-type: kernels
+      pytest-marker: kernels
 
   siracusa-models:
     needs: select-env
@@ -43,4 +43,4 @@ jobs:
     with:
       runner: ${{ needs.select-env.outputs.runner }}
       docker-image: ${{ needs.select-env.outputs.image }}
-      test-type: models
+      pytest-marker: models
diff --git a/DeeployTest/test_platforms.py b/DeeployTest/test_platforms.py
index 32fece84ec..3176d9af5b 100644
--- a/DeeployTest/test_platforms.py
+++ b/DeeployTest/test_platforms.py
@@ -304,21 +304,6 @@ def test_siracusa_tiled_kernels_l2_singlebuffer(test_params, deeploy_test_dir, t
         double_buffer = False,
     )
     run_and_assert_test(test_name, config, skipgen, skipsim)
-    config = create_test_config(
-        test_name = test_name,
-        platform = "Siracusa",
-        simulator = "gvsoc",
-        deeploy_test_dir = deeploy_test_dir,
-        toolchain = toolchain,
-        toolchain_dir = toolchain_dir,
-        cmake_args = cmake_args,
-        tiling = True,
-        cores = SIRACUSA_DEFAULT_CORES,
-        l1 = l1,
-        default_mem_level = "L2",
-        double_buffer = False,
-    )
-    run_and_assert_test(test_name, config, skipgen, skipsim)
 
 
 @pytest.mark.siracusa_tiled

From 6166db112a38b526ada871c96cd29fbb4fa0e4cc Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Mon, 12 Jan 2026 13:24:42 +0100
Subject: [PATCH 44/51] Fix typo

---
 .github/workflows/_runner-siracusa.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/_runner-siracusa.yml b/.github/workflows/_runner-siracusa.yml
index 98425303da..ea8fe5d405 100644
--- a/.github/workflows/_runner-siracusa.yml
+++ b/.github/workflows/_runner-siracusa.yml
@@ -44,5 +44,5 @@ jobs:
           export CCACHE_DIR=/app/.ccache
 
           # Run tests using pytest markers
-          pytest test_platforms.py -m -v -n 4 "siracusa and ${{ inputs.pytest-marker }}"
+          pytest test_platforms.py -v -n 4 -m "siracusa and ${{ inputs.pytest-marker }}"
         shell: bash

From 39ea2ce313d8e166634e84ccc929ef1c2456c8c2 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 16 Jan 2026 10:27:32 +0100
Subject: [PATCH 45/51] Centralize runner logic for test suite and CLI

---
 DeeployTest/deeployRunner_cortexm.py          |  12 +
 DeeployTest/deeployRunner_generic.py          |  12 +
 DeeployTest/deeployRunner_siracusa.py         |  18 +
 DeeployTest/deeployRunner_snitch.py           |  18 +
 DeeployTest/deeployRunner_tiled_siracusa.py   |  18 +
 .../deeployRunner_tiled_siracusa_w_neureka.py |  20 +
 DeeployTest/deeployRunner_tiled_snitch.py     |  18 +
 DeeployTest/testUtils/core/__init__.py        |  19 +
 DeeployTest/testUtils/core/config.py          |  33 ++
 DeeployTest/testUtils/core/execution.py       | 220 ++++++++++
 DeeployTest/testUtils/core/output_parser.py   |  49 +++
 DeeployTest/testUtils/core/paths.py           |  48 +++
 DeeployTest/testUtils/deeployRunner.py        | 403 ++++++++++++++++++
 DeeployTest/testUtils/pytestRunner.py         | 380 +----------------
 14 files changed, 891 insertions(+), 377 deletions(-)
 create mode 100644 DeeployTest/deeployRunner_cortexm.py
 create mode 100644 DeeployTest/deeployRunner_generic.py
 create mode 100644 DeeployTest/deeployRunner_siracusa.py
 create mode 100644 DeeployTest/deeployRunner_snitch.py
 create mode 100644 DeeployTest/deeployRunner_tiled_siracusa.py
 create mode 100644 DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py
 create mode 100644 DeeployTest/deeployRunner_tiled_snitch.py
 create mode 100644 DeeployTest/testUtils/core/__init__.py
 create mode 100644 DeeployTest/testUtils/core/config.py
 create mode 100644 DeeployTest/testUtils/core/execution.py
 create mode 100644 DeeployTest/testUtils/core/output_parser.py
 create mode 100644 DeeployTest/testUtils/core/paths.py
 create mode 100644 DeeployTest/testUtils/deeployRunner.py

diff --git a/DeeployTest/deeployRunner_cortexm.py b/DeeployTest/deeployRunner_cortexm.py
new file mode 100644
index 0000000000..f6f999a073
--- /dev/null
+++ b/DeeployTest/deeployRunner_cortexm.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+import sys
+from testUtils.deeployRunner import main
+
+
+if __name__ == "__main__":
+    sys.exit(main(default_platform="QEMU-ARM", default_simulator="qemu", tiling_enabled=False))
diff --git a/DeeployTest/deeployRunner_generic.py b/DeeployTest/deeployRunner_generic.py
new file mode 100644
index 0000000000..e239921525
--- /dev/null
+++ b/DeeployTest/deeployRunner_generic.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+import sys
+from testUtils.deeployRunner import main
+
+
+if __name__ == "__main__":
+    sys.exit(main(default_platform="Generic", default_simulator="host", tiling_enabled=False))
diff --git a/DeeployTest/deeployRunner_siracusa.py b/DeeployTest/deeployRunner_siracusa.py
new file mode 100644
index 0000000000..f087a14981
--- /dev/null
+++ b/DeeployTest/deeployRunner_siracusa.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from testUtils.deeployRunner import main
+import sys
+
+
+if __name__ == "__main__":
+    
+    # Define parser setup callback to add Siracusa-specific arguments
+    def setup_parser(parser):
+        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
+    
+    sys.exit(main(default_platform="Siracusa", default_simulator="gvsoc", tiling_enabled=False,
+                  parser_setup_callback=setup_parser))
+
diff --git a/DeeployTest/deeployRunner_snitch.py b/DeeployTest/deeployRunner_snitch.py
new file mode 100644
index 0000000000..88d34dc2a7
--- /dev/null
+++ b/DeeployTest/deeployRunner_snitch.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+from testUtils.deeployRunner import main
+import sys
+
+
+if __name__ == "__main__":
+    
+    # Define parser setup callback to add Snitch-specific arguments
+    def setup_parser(parser):
+        parser.add_argument('--num-cores', type=int, default=8, dest='num_cores', help='Number of cores (default: 8)\n')
+    
+    sys.exit(main(default_platform="Snitch", default_simulator="gvsoc", tiling_enabled=False,
+                  parser_setup_callback=setup_parser))
diff --git a/DeeployTest/deeployRunner_tiled_siracusa.py b/DeeployTest/deeployRunner_tiled_siracusa.py
new file mode 100644
index 0000000000..29f2fbabc0
--- /dev/null
+++ b/DeeployTest/deeployRunner_tiled_siracusa.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+import sys
+from testUtils.deeployRunner import main
+
+
+if __name__ == "__main__":
+    
+    # Define parser setup callback to add Siracusa-specific arguments
+    def setup_parser(parser):
+        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
+    
+    sys.exit(main(default_platform="Siracusa", default_simulator="gvsoc", tiling_enabled=True,
+                  parser_setup_callback=setup_parser))
diff --git a/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py b/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py
new file mode 100644
index 0000000000..fcd05282be
--- /dev/null
+++ b/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+from testUtils.deeployRunner import main
+import sys
+
+
+if __name__ == "__main__":
+    
+    # Define parser setup callback to add Siracusa+Neureka-specific arguments
+    def setup_parser(parser):
+        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
+        parser.add_argument('--neureka-wmem', action='store_true', help='Enable Neureka weight memory\n')
+        parser.add_argument('--enable-3x3', action='store_true', help='Enable 3x3 convolutions\n')
+    
+    sys.exit(main(default_platform="Siracusa_w_neureka", default_simulator="gvsoc", tiling_enabled=True,
+                  parser_setup_callback=setup_parser))
diff --git a/DeeployTest/deeployRunner_tiled_snitch.py b/DeeployTest/deeployRunner_tiled_snitch.py
new file mode 100644
index 0000000000..06cf455a19
--- /dev/null
+++ b/DeeployTest/deeployRunner_tiled_snitch.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+
+from testUtils.deeployRunner import main
+import sys
+
+
+if __name__ == "__main__":
+    
+    # Define parser setup callback to add Snitch-specific arguments
+    def setup_parser(parser):
+        parser.add_argument('--num-cores', type=int, default=8, dest='num_cores', help='Number of cores (default: 8)\n')
+    
+    sys.exit(main(default_platform="Snitch", default_simulator="gvsoc", tiling_enabled=True,
+                  parser_setup_callback=setup_parser))
diff --git a/DeeployTest/testUtils/core/__init__.py b/DeeployTest/testUtils/core/__init__.py
new file mode 100644
index 0000000000..053d678a9e
--- /dev/null
+++ b/DeeployTest/testUtils/core/__init__.py
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from .config import DeeployTestConfig
+from .execution import (build_binary, configure_cmake, generate_network, run_complete_test, run_simulation)
+from .output_parser import TestResult
+from .paths import get_test_paths
+
+__all__ = [
+    'DeeployTestConfig',
+    'TestResult',
+    'get_test_paths',
+    'generate_network',
+    'configure_cmake',
+    'build_binary',
+    'run_simulation',
+    'run_complete_test',
+]
diff --git a/DeeployTest/testUtils/core/config.py b/DeeployTest/testUtils/core/config.py
new file mode 100644
index 0000000000..0c545e1b73
--- /dev/null
+++ b/DeeployTest/testUtils/core/config.py
@@ -0,0 +1,33 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+from dataclasses import dataclass
+from typing import List, Literal, Optional
+
+
+@dataclass
+class DeeployTestConfig:
+    """Configuration for a single test case."""
+    test_name: str
+    test_dir: str
+    platform: str
+    simulator: Literal['gvsoc', 'banshee', 'qemu', 'vsim', 'vsim.gui', 'host', 'none']
+    tiling: bool
+    gen_dir: str
+    build_dir: str
+    toolchain: str = "LLVM"
+    toolchain_install_dir: Optional[str] = None
+    cmake_args: List[str] = None
+    gen_args: List[str] = None
+    verbose: int = 0
+    debug: bool = False
+
+    def __post_init__(self):
+        if self.cmake_args is None:
+            self.cmake_args = []
+        if self.gen_args is None:
+            self.gen_args = []
+        if self.toolchain_install_dir is None:
+            self.toolchain_install_dir = os.environ.get('LLVM_INSTALL_DIR')
diff --git a/DeeployTest/testUtils/core/execution.py b/DeeployTest/testUtils/core/execution.py
new file mode 100644
index 0000000000..4a00c2fc11
--- /dev/null
+++ b/DeeployTest/testUtils/core/execution.py
@@ -0,0 +1,220 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import os
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from Deeploy.Logging import DEFAULT_LOGGER as log
+
+from .config import DeeployTestConfig
+from .output_parser import TestResult, parse_test_output
+
+
+def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
+    """
+    Generate network code from ONNX model.
+        
+    Raises:
+        RuntimeError: If network generation fails
+    """
+    if skip:
+        log.info(f"Skipping network generation for {config.test_name}")
+        return
+
+    script_dir = Path(__file__).parent.parent.parent
+
+    if config.tiling:
+        generation_script = script_dir / "testMVP.py"
+    else:
+        generation_script = script_dir / "generateNetwork.py"
+
+    cmd = [
+        "python",
+        str(generation_script),
+        "-d",
+        config.gen_dir,
+        "-t",
+        config.test_dir,
+        "-p",
+        config.platform,
+    ]
+
+    # Add verbosity flags
+    if config.verbose > 0:
+        cmd.append("-" + "v" * config.verbose)
+
+    # Add debug flag
+    if config.debug:
+        cmd.append("--debug")
+
+    # Add additional generation arguments
+    cmd.extend(config.gen_args)
+
+    log.debug(f"[Execution] Generation command: {' '.join(cmd)}")
+
+    result = subprocess.run(cmd, check=False)
+
+    if result.returncode != 0:
+        log.error(f"Network generation failed with return code {result.returncode}")
+        raise RuntimeError(f"Network generation failed for {config.test_name}")
+
+
+def configure_cmake(config: DeeployTestConfig) -> None:
+
+    assert config.toolchain_install_dir is not None, \
+        "LLVM_INSTALL_DIR environment variable not set"
+
+    cmake_cmd = os.environ.get("CMAKE", "cmake")
+    if cmake_cmd == "cmake" and shutil.which("cmake") is None:
+        raise RuntimeError("CMake not found. Please install CMake or set CMAKE environment variable")
+
+    # Build CMake command
+    cmd = [
+        cmake_cmd,
+        f"-DTOOLCHAIN={config.toolchain}",
+        f"-DTOOLCHAIN_INSTALL_DIR={config.toolchain_install_dir}",
+        f"-DGENERATED_SOURCE={config.gen_dir}",
+        f"-Dplatform={config.platform}",
+        f"-DTESTNAME={config.test_name}",
+        f"-B{config.build_dir}",
+    ]
+
+    for arg in config.cmake_args:
+        if not arg.startswith("-D"):
+            arg = "-D" + arg
+        cmd.append(arg)
+
+    if config.simulator == 'banshee':
+        cmd.append("-Dbanshee_simulation=ON")
+    else:
+        cmd.append("-Dbanshee_simulation=OFF")
+
+    if config.simulator == 'gvsoc':
+        cmd.append("-Dgvsoc_simulation=ON")
+    else:
+        cmd.append("-Dgvsoc_simulation=OFF")
+
+    # Last argument is the source directory
+    script_dir = Path(__file__).parent.parent.parent
+    cmd.append(str(script_dir.parent))
+
+    env = os.environ.copy()
+    if config.verbose >= 3:
+        env["VERBOSE"] = "1"
+
+    log.debug(f"[Execution] CMake command: {' '.join(cmd)}")
+
+    result = subprocess.run(cmd, check=False, env=env)
+
+    if result.returncode != 0:
+        log.error(f"CMake configuration failed with return code {result.returncode}")
+        raise RuntimeError(f"CMake configuration failed for {config.test_name}")
+
+
+def build_binary(config: DeeployTestConfig) -> None:
+
+    cmake_cmd = os.environ.get("CMAKE", "cmake")
+
+    cmd = [
+        cmake_cmd,
+        "--build",
+        config.build_dir,
+        "--target",
+        config.test_name,
+    ]
+
+    env = os.environ.copy()
+    if config.verbose >= 3:
+        env["VERBOSE"] = "1"
+
+    log.debug(f"[Execution] Build command: {' '.join(cmd)}")
+
+    result = subprocess.run(cmd, check=False, env=env)
+
+    if result.returncode != 0:
+        log.error(f"Build failed with return code {result.returncode}")
+        raise RuntimeError(f"Build failed for {config.test_name}")
+
+
+def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
+    """
+    Run simulation and parse output.
+        
+    Raises:
+        RuntimeError: If simulation cannot be executed
+    """
+    if skip:
+        log.info(f"Skipping simulation for {config.test_name}")
+        return TestResult(success=True, error_count=0, total_count=0, stdout="Skipped")
+
+    if config.simulator == 'none':
+        raise RuntimeError("No simulator specified!")
+
+    if config.simulator == 'host':
+        # Run binary directly
+        binary_path = Path(config.build_dir) / "bin" / config.test_name
+        cmd = [str(binary_path)]
+    else:
+        # Run via CMake target
+        cmake_cmd = os.environ.get("CMAKE", "cmake")
+        cmd = [
+            cmake_cmd,
+            "--build",
+            config.build_dir,
+            "--target",
+            f"{config.simulator}_{config.test_name}",
+        ]
+
+    env = os.environ.copy()
+    if config.verbose >= 3:
+        env["VERBOSE"] = "1"
+
+    if config.simulator == 'banshee':
+        if config.verbose == 1:
+            env["BANSHEE_LOG"] = "warn"
+        elif config.verbose == 2:
+            env["BANSHEE_LOG"] = "info"
+        elif config.verbose >= 3:
+            env["BANSHEE_LOG"] = "debug"
+
+    log.debug(f"[Execution] Simulation command: {' '.join(cmd)}")
+
+    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
+
+    if result.stdout:
+        print(result.stdout, end='')
+    if result.stderr:
+        print(result.stderr, end='', file=sys.stderr)
+
+    # Parse output for error count and cycles
+    test_result = parse_test_output(result.stdout, result.stderr)
+
+    if not test_result.success and test_result.error_count == -1:
+        log.warning(f"Could not parse error count from output")
+
+    return test_result
+
+
+def run_complete_test(config: DeeployTestConfig, skipgen: bool = False, skipsim: bool = False) -> TestResult:
+    """
+    Run a complete test: generate, configure, build, and simulate.
+    """
+    log.info(f"################## Testing {config.test_name} on {config.platform} Platform ##################")
+
+    # Step 1: Generate network
+    generate_network(config, skip=skipgen)
+
+    # Step 2: Configure CMake
+    configure_cmake(config)
+
+    # Step 3: Build binary
+    build_binary(config)
+
+    # Step 4: Run simulation
+    result = run_simulation(config, skip=skipsim)
+
+    return result
diff --git a/DeeployTest/testUtils/core/output_parser.py b/DeeployTest/testUtils/core/output_parser.py
new file mode 100644
index 0000000000..9e32a291a6
--- /dev/null
+++ b/DeeployTest/testUtils/core/output_parser.py
@@ -0,0 +1,49 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import re
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class TestResult:
+    success: bool
+    error_count: int
+    total_count: int
+    stdout: str
+    stderr: str = ""
+    runtime_cycles: Optional[int] = None
+
+
+def parse_test_output(stdout: str, stderr: str = "") -> TestResult:
+
+    output = stdout + stderr
+
+    # Look for "Errors: X out of Y" pattern
+    error_match = re.search(r'Errors:\s*(\d+)\s*out\s*of\s*(\d+)', output)
+
+    if error_match:
+        error_count = int(error_match.group(1))
+        total_count = int(error_match.group(2))
+        success = (error_count == 0)
+    else:
+        # Could not parse output - treat as failure
+        error_count = -1
+        total_count = -1
+        success = False
+
+    runtime_cycles = None
+    cycle_match = re.search(r'Runtime:\s*(\d+)\s*cycles', output)
+    if cycle_match:
+        runtime_cycles = int(cycle_match.group(1))
+
+    return TestResult(
+        success=success,
+        error_count=error_count,
+        total_count=total_count,
+        stdout=stdout,
+        stderr=stderr,
+        runtime_cycles=runtime_cycles,
+    )
diff --git a/DeeployTest/testUtils/core/paths.py b/DeeployTest/testUtils/core/paths.py
new file mode 100644
index 0000000000..016924d1de
--- /dev/null
+++ b/DeeployTest/testUtils/core/paths.py
@@ -0,0 +1,48 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from pathlib import Path
+from typing import Optional, Tuple
+
+from Deeploy.Logging import DEFAULT_LOGGER as log
+
+
+def get_test_paths(test_dir: str, platform: str, base_dir: Optional[str] = None) -> Tuple[str, str, str]:
+    """
+    Resolve test paths for generation and build directories.
+    
+    Args:
+        test_dir: Path to test directory (e.g., "Tests/Adder" or absolute path)
+        platform: Platform name (e.g., "Generic")
+        base_dir: Base directory for tests (defaults to DeeployTest/)
+        
+    Returns:
+        Tuple of (gen_dir, test_dir_abs, test_name)
+    """
+    if base_dir is None:
+        # Get the absolute path of this script's parent directory (core -> testUtils -> DeeployTest)
+        script_path = Path(__file__).resolve()
+        base_dir = script_path.parent.parent.parent
+    else:
+        base_dir = Path(base_dir)
+
+    test_path = Path(test_dir)
+    if not test_path.is_absolute():
+        test_path = base_dir / test_dir
+
+    test_path = test_path.resolve()
+    test_name = test_path.name
+
+    gen_dir_name = f"TEST_{platform.upper()}"
+
+    # Check if path is inside base_dir
+    try:
+        rel_path = test_path.relative_to(base_dir)
+        gen_dir = base_dir / gen_dir_name / rel_path
+    except ValueError:
+        # Path is outside base_dir
+        gen_dir = base_dir / gen_dir_name / test_name
+        log.warning(f"Test path {test_path} is outside base directory. Using {gen_dir}")
+
+    return str(gen_dir), str(test_path), test_name
diff --git a/DeeployTest/testUtils/deeployRunner.py b/DeeployTest/testUtils/deeployRunner.py
new file mode 100644
index 0000000000..273b681762
--- /dev/null
+++ b/DeeployTest/testUtils/deeployRunner.py
@@ -0,0 +1,403 @@
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+import codecs
+import os
+import sys
+import coloredlogs
+from pathlib import Path
+from typing import Optional
+
+from Deeploy.Logging import DEFAULT_FMT, DETAILED_FILE_LOG_FORMAT
+from Deeploy.Logging import DEFAULT_LOGGER as log
+
+from .core import DeeployTestConfig, run_complete_test
+from .core.paths import get_test_paths
+
+
+def cmake_str(arg_str):
+    return "-D" + codecs.decode(str(arg_str), 'unicode_escape')
+
+
+class _ArgumentDefaultMetavarTypeFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.MetavarTypeHelpFormatter):
+
+    def __init__(self, prog: str, indent_increment: int = 2, max_help_position: int = 100, width=None) -> None:
+        super().__init__(prog, indent_increment, max_help_position, width)
+
+
+class DeeployRunnerArgumentParser(argparse.ArgumentParser):
+
+    def __init__(self, tiling_arguments: bool, description: Optional[str] = None, platform_required: bool = True, 
+                 allow_extra_args: bool = False):
+        formatter = _ArgumentDefaultMetavarTypeFormatter
+
+        if description is None:
+            super().__init__(description="Deeploy Code Generation and Test Utility.", formatter_class=formatter)
+        else:
+            super().__init__(description=description, formatter_class=formatter)
+        
+        self.allow_extra_args = allow_extra_args
+
+        self.tiling_arguments = tiling_arguments
+
+        self.add_argument('-t',
+                          metavar='<dir>',
+                          dest='dir',
+                          type=str,
+                          required=True,
+                          help='Test directory (e.g., Tests/Kernels/Integer/Add/Regular)\n')
+        self.add_argument('-p',
+                          metavar='<platform>',
+                          dest='platform',
+                          type=str,
+                          required=platform_required,
+                          default=None,
+                          help='Target platform (e.g., Generic, QEMU-ARM, Siracusa, Snitch)\n')
+        self.add_argument('-s',
+                          metavar='<simulator>',
+                          dest='simulator',
+                          type=str,
+                          default=None,
+                          help='Simulator to use (gvsoc, banshee, qemu, vsim, host, none)\n')
+        self.add_argument('-v', action='count', dest='verbose', default=0, help='Increase verbosity level\n')
+        self.add_argument('-D',
+                          dest='cmake',
+                          action='extend',
+                          nargs="*",
+                          type=cmake_str,
+                          help="Create or update a cmake cache entry\n")
+        self.add_argument('--debug',
+                          dest='debug',
+                          action='store_true',
+                          default=False,
+                          help='Enable debugging mode\n')
+        self.add_argument('--skipgen',
+                          dest='skipgen',
+                          action='store_true',
+                          default=False,
+                          help='Skip network generation (reuse existing generated code)\n')
+        self.add_argument('--skipsim',
+                          dest='skipsim',
+                          action='store_true',
+                          default=False,
+                          help='Skip simulation (build only)\n')
+        self.add_argument('--toolchain',
+                          metavar='<LLVM|GCC>',
+                          dest='toolchain',
+                          type=str,
+                          default="LLVM",
+                          help='Compiler toolchain\n')
+        self.add_argument('--toolchain-install-dir',
+                          metavar='<dir>',
+                          dest='toolchain_install_dir',
+                          type=str,
+                          default=os.environ.get('LLVM_INSTALL_DIR'),
+                          help='Toolchain installation directory\n')
+        self.add_argument('--input-type-map',
+                          nargs='*',
+                          default=[],
+                          type=str,
+                          help='(Optional) mapping of input names to data types. '
+                          'Example: --input-type-map input_0=int8_t input_1=float32_t\n')
+        self.add_argument('--input-offset-map',
+                          nargs='*',
+                          default=[],
+                          type=str,
+                          help='(Optional) mapping of input names to offsets. '
+                          'Example: --input-offset-map input_0=0 input_1=128\n')
+
+        if self.tiling_arguments:
+            self.add_argument('--defaultMemLevel',
+                              metavar='<level>',
+                              dest='defaultMemLevel',
+                              type=str,
+                              default="L2",
+                              help='Default memory level (L2 or L3)\n')
+            self.add_argument('--doublebuffer',
+                              action='store_true',
+                              help='Enable double buffering\n')
+            self.add_argument('--l1',
+                              metavar='<size>',
+                              dest='l1',
+                              type=int,
+                              default=64000,
+                              help='L1 size in bytes\n')
+            self.add_argument('--l2',
+                              metavar='<size>',
+                              dest='l2',
+                              type=int,
+                              default=1024000,
+                              help='L2 size in bytes\n')
+            self.add_argument('--randomizedMemoryScheduler',
+                              action="store_true",
+                              help='Enable randomized memory scheduler\n')
+            self.add_argument('--profileTiling',
+                              action='store_true',
+                              help='Enable tiling profiling\n')
+            self.add_argument('--memAllocStrategy',
+                              metavar='<strategy>',
+                              dest='memAllocStrategy',
+                              type=str,
+                              default="MiniMalloc",
+                              help='Memory allocation strategy: TetrisRandom, TetrisCo-Opt, MiniMalloc\n')
+            self.add_argument('--searchStrategy',
+                              metavar='<strategy>',
+                              dest='searchStrategy',
+                              type=str,
+                              default="random-max",
+                              help='CP solver search strategy: random-max, max, min\n')
+            self.add_argument('--plotMemAlloc',
+                              action='store_true',
+                              help='Plot memory allocation and save in deeployState folder\n')
+
+        self.args = None
+
+    def parse_args(self, args=None, namespace=None) -> argparse.Namespace:
+        
+        self.args = super().parse_args(args, namespace)
+
+        if self.args.verbose > 2:
+            coloredlogs.install(level='DEBUG', logger=log, fmt=DETAILED_FILE_LOG_FORMAT)
+        elif self.args.verbose > 1:
+            coloredlogs.install(level='DEBUG', logger=log, fmt=DEFAULT_FMT)
+        elif self.args.verbose > 0:
+            coloredlogs.install(level='INFO', logger=log, fmt=DEFAULT_FMT)
+        else:
+            coloredlogs.install(level='WARNING', logger=log, fmt=DEFAULT_FMT)
+
+        return self.args
+
+
+def create_config_from_args(args: argparse.Namespace, platform: str, simulator: str, tiling: bool, 
+                            platform_specific_cmake_args: Optional[list] = None) -> DeeployTestConfig:
+
+    script_path = Path(__file__).resolve()
+    base_dir = script_path.parent.parent
+
+    test_dir = args.dir
+    gen_dir, test_dir_abs, test_name = get_test_paths(test_dir, platform, base_dir=str(base_dir))
+
+    build_dir = str(base_dir / f"TEST_{platform.upper()}" / "build")
+
+    cmake_args_list = list(args.cmake) if args.cmake else []
+    
+    # Add platform-specific CMake args
+    if platform_specific_cmake_args:
+        cmake_args_list.extend(platform_specific_cmake_args)
+
+    # Prepare generation args
+    gen_args_list = []
+
+    if args.input_type_map:
+        gen_args_list.append("--input-type-map")
+        gen_args_list.extend(args.input_type_map)
+    if args.input_offset_map:
+        gen_args_list.append("--input-offset-map")
+        gen_args_list.extend(args.input_offset_map)
+
+    if tiling:
+        if hasattr(args, 'defaultMemLevel') and args.defaultMemLevel:
+            gen_args_list.append(f"--defaultMemLevel={args.defaultMemLevel}")
+        if hasattr(args, 'doublebuffer') and args.doublebuffer:
+            gen_args_list.append("--doublebuffer")
+        if hasattr(args, 'l1') and args.l1:
+            gen_args_list.append(f"--l1={args.l1}")
+        if hasattr(args, 'l2') and args.l2 and args.l2 != 1024000:
+            gen_args_list.append(f"--l2={args.l2}")
+        if hasattr(args, 'randomizedMemoryScheduler') and args.randomizedMemoryScheduler:
+            gen_args_list.append("--randomizedMemoryScheduler")
+        if hasattr(args, 'profileTiling') and args.profileTiling:
+            gen_args_list.append("--profileTiling")
+        if hasattr(args, 'memAllocStrategy') and args.memAllocStrategy:
+            gen_args_list.append(f"--memAllocStrategy={args.memAllocStrategy}")
+        if hasattr(args, 'searchStrategy') and args.searchStrategy:
+            gen_args_list.append(f"--searchStrategy={args.searchStrategy}")
+        if hasattr(args, 'plotMemAlloc') and args.plotMemAlloc:
+            gen_args_list.append("--plotMemAlloc")
+
+    config = DeeployTestConfig(
+        test_name=test_name,
+        test_dir=test_dir_abs,
+        platform=platform,
+        simulator=simulator,
+        tiling=tiling,
+        gen_dir=gen_dir,
+        build_dir=build_dir,
+        toolchain=args.toolchain,
+        toolchain_install_dir=args.toolchain_install_dir,
+        cmake_args=cmake_args_list,
+        gen_args=gen_args_list,
+        verbose=args.verbose,
+        debug=args.debug,
+    )
+
+    return config
+
+
+def print_colored_result(result, test_name: str):
+    
+    GREEN = '\033[92m'
+    RED = '\033[91m'
+    RESET = '\033[0m'
+
+    if result.success and result.error_count == 0:
+        print(f"\n{GREEN}✓ Test {test_name} PASSED - No errors found{RESET}")
+        if result.runtime_cycles is not None:
+            print(f"{GREEN}  Runtime: {result.runtime_cycles} cycles{RESET}")
+    else:
+        print(f"\n{RED}✗ Test {test_name} FAILED - {result.error_count} errors out of {result.total_count}{RESET}")
+        if result.runtime_cycles is not None:
+            print(f"{RED}  Runtime: {result.runtime_cycles} cycles{RESET}")
+
+
+def print_configuration(config: DeeployTestConfig):
+    
+    CYAN = '\033[96m'
+    BOLD = '\033[1m'
+    RESET = '\033[0m'
+    
+    print(f"\n{BOLD}{CYAN}═══════════════════════════════════════════════════════════════{RESET}")
+    print(f"{BOLD}{CYAN}                    Deeploy Test Configuration                 {RESET}")
+    print(f"{BOLD}{CYAN}═══════════════════════════════════════════════════════════════{RESET}\n")
+    
+    print(f"{BOLD}Test Configuration:{RESET}")
+    print(f"  Test Name           : {config.test_name}")
+    print(f"  Test Directory      : {config.test_dir}")
+    print(f"  Generation Directory: {config.gen_dir}")
+    print(f"  Build Directory     : {config.build_dir}")
+    
+    print(f"\n{BOLD}Platform Configuration:{RESET}")
+    print(f"  Platform            : {config.platform}")
+    print(f"  Simulator           : {config.simulator}")
+    print(f"  Tiling Enabled      : {'Yes' if config.tiling else 'No'}")
+    
+    print(f"\n{BOLD}Build Configuration:{RESET}")
+    print(f"  Toolchain           : {config.toolchain}")
+    if config.toolchain_install_dir:
+        print(f"  Toolchain Directory : {config.toolchain_install_dir}")
+    if config.cmake_args:
+        print(f"  CMake Arguments     : {' '.join(config.cmake_args)}")
+    
+    print(f"\n{BOLD}Runtime Configuration:{RESET}")
+    print(f"  Verbosity Level     : {config.verbose}")
+    print(f"  Debug Mode          : {'Enabled' if config.debug else 'Disabled'}")
+    if config.gen_args:
+        print(f"  Generation Arguments: {' '.join(config.gen_args)}")
+    
+    print(f"\n{BOLD}{CYAN}═══════════════════════════════════════════════════════════════{RESET}\n")
+
+
+def main(default_platform: Optional[str] = None,
+         default_simulator: Optional[str] = None,
+         tiling_enabled: bool = False,
+         platform_specific_cmake_args: Optional[list] = None,
+         parsed_args: Optional[argparse.Namespace] = None,
+         parser_setup_callback=None):
+    """
+    Main entry point for Deeploy test runners.
+    
+    Args:
+        default_platform: Default platform if not specified via -p
+        default_simulator: Default simulator if not specified via -s
+        tiling_enabled: Whether tiling is enabled
+        platform_specific_cmake_args: Additional CMake arguments for platform-specific configurations
+        parsed_args: Pre-parsed arguments (if None, will parse from sys.argv)
+        parser_setup_callback: Optional callback to configure parser before parsing (receives parser as arg)
+    """
+
+    if parsed_args is None:
+        # Make -p optional if default_platform is provided
+        parser = DeeployRunnerArgumentParser(tiling_arguments=tiling_enabled, 
+                                            platform_required=(default_platform is None))
+        
+        # Allow platform-specific runners to add their own arguments
+        if parser_setup_callback:
+            parser_setup_callback(parser)
+        
+        args = parser.parse_args()
+    else:
+        args = parsed_args
+
+    platform_map = {
+        "generic": "Generic",
+        "qemu-arm": "QEMU-ARM",
+        "mempool": "MemPool",
+        "siracusa": "Siracusa",
+        "siracusa_w_neureka": "Siracusa_w_neureka",
+        "snitch": "Snitch",
+        "chimera": "Chimera",
+        "softhier": "SoftHier",
+    }
+
+    if args.platform:
+        platform = platform_map.get(args.platform.lower(), args.platform)
+    else:
+        platform = default_platform
+
+    # Validate platform if default is provided
+    if default_platform and args.platform:
+        normalized_specified = platform_map.get(args.platform.lower(), args.platform)
+        if normalized_specified != default_platform:
+            RED = '\033[91m'
+            BOLD = '\033[1m'
+            RESET = '\033[0m'
+            print(f"\n{RED}{BOLD}ERROR: Platform mismatch!{RESET}", file=sys.stderr)
+            print(f"{RED}This runner is designed for the '{default_platform}' platform.{RESET}", file=sys.stderr)
+            print(f"{RED}You specified platform: '{args.platform}' (normalized to '{normalized_specified}'){RESET}\n", file=sys.stderr)
+            print(f"Please use one of the following options:", file=sys.stderr)
+            print(f"  1. Remove the '-p {args.platform}' argument to use the default platform", file=sys.stderr)
+            print(f"  2. Use the correct platform-specific runner script for '{normalized_specified}'", file=sys.stderr)
+            sys.exit(1)
+
+    simulator = args.simulator if args.simulator else default_simulator
+
+    if platform is None:
+        print("Error: Platform must be specified with -p or provided as default", file=sys.stderr)
+        sys.exit(1)
+
+    if simulator is None:
+        simulator_map = {
+            "Generic": "host",
+            "QEMU-ARM": "qemu",
+            "MemPool": "banshee",
+            "Siracusa": "gvsoc",
+            "Siracusa_w_neureka": "gvsoc",
+            "Snitch": "gvsoc",
+            "Chimera": "gvsoc",
+            "SoftHier": "gvsoc",
+        }
+        simulator = simulator_map.get(platform, "host")
+        log.info(f"No simulator specified, using default for {platform}: {simulator}")
+    
+    # Extract platform-specific CMake args from parsed args if available
+    if platform_specific_cmake_args is None:
+        platform_specific_cmake_args = []
+    
+    # Check for platform-specific arguments in args object and build CMake args
+    if hasattr(args, 'cores'):
+        platform_specific_cmake_args.append(f"-DNUM_CORES={args.cores}")
+    elif hasattr(args, 'num_cores'):
+        platform_specific_cmake_args.append(f"-DNUM_CORES={args.num_cores}")
+
+    config = create_config_from_args(args, platform, simulator, tiling_enabled, platform_specific_cmake_args)
+
+    print_configuration(config)
+
+    try:
+        result = run_complete_test(config, skipgen=args.skipgen, skipsim=args.skipsim)
+
+        print_colored_result(result, config.test_name)
+
+        return 0 if result.success else 1
+
+    except Exception as e:
+        RED = '\033[91m'
+        RESET = '\033[0m'
+        print(f"\n{RED}✗ Test {config.test_name} FAILED with exception: {e}{RESET}")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index 09d19da735..875de8113b 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -3,351 +3,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import os
-import re
-import shutil
-import subprocess
-import sys
-from dataclasses import dataclass
 from pathlib import Path
-from typing import List, Literal, Optional, Tuple
+from typing import List, Literal, Optional
 
-from Deeploy.Logging import DEFAULT_LOGGER as log
-
-
-@dataclass
-class DeeployTestConfig:
-    """Configuration for a single test case."""
-    test_name: str
-    test_dir: str
-    platform: str
-    simulator: Literal['gvsoc', 'banshee', 'qemu', 'vsim', 'vsim.gui', 'host', 'none']
-    tiling: bool
-    gen_dir: str
-    build_dir: str
-    toolchain: str = "LLVM"
-    toolchain_install_dir: Optional[str] = None
-    cmake_args: List[str] = None
-    gen_args: List[str] = None
-    verbose: int = 0
-    debug: bool = False
-
-    def __post_init__(self):
-        if self.cmake_args is None:
-            self.cmake_args = []
-        if self.gen_args is None:
-            self.gen_args = []
-        if self.toolchain_install_dir is None:
-            self.toolchain_install_dir = os.environ.get('LLVM_INSTALL_DIR')
-
-
-@dataclass
-class TestResult:
-    """Results from running a test."""
-    success: bool
-    error_count: int
-    total_count: int
-    stdout: str
-    stderr: str = ""
-    runtime_cycles: Optional[int] = None
-
-
-def get_test_paths(test_dir: str, platform: str, base_dir: Optional[str] = None) -> Tuple[str, str, str]:
-    """
-    Args:
-        test_dir: Path to test directory (e.g., "Tests/Adder" or absolute path)
-        platform: Platform name (e.g., "Generic")
-        base_dir: Base directory for tests (defaults to DeeployTest/)
-        
-    Returns:
-        Tuple of (gen_dir, test_dir_abs, test_name)
-    """
-    if base_dir is None:
-        # Get the absolute path of this script's parent directory (testUtils -> DeeployTest)
-        script_path = Path(__file__).resolve()
-        base_dir = script_path.parent.parent
-    else:
-        base_dir = Path(base_dir)
-
-    test_path = Path(test_dir)
-    if not test_path.is_absolute():
-        test_path = base_dir / test_dir
-
-    test_path = test_path.resolve()
-    test_name = test_path.name
-
-    gen_dir_name = f"TEST_{platform.upper()}"
-
-    # Check if path is inside base_dir
-    try:
-        rel_path = test_path.relative_to(base_dir)
-        gen_dir = base_dir / gen_dir_name / rel_path
-    except ValueError:
-        # Path is outside base_dir
-        gen_dir = base_dir / gen_dir_name / test_name
-        log.warning(f"Test path {test_path} is outside base directory. Using {gen_dir}")
-
-    return str(gen_dir), str(test_path), test_name
-
-
-def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
-    """
-    Args:
-        config: Test configuration
-        skip: If True, skip generation (useful for re-running tests)
-        
-    Raises:
-        RuntimeError: If network generation fails
-    """
-    if skip:
-        log.info(f"Skipping network generation for {config.test_name}")
-        return
-
-    script_dir = Path(__file__).parent.parent
-
-    if config.tiling:
-        generation_script = script_dir / "testMVP.py"
-    else:
-        generation_script = script_dir / "generateNetwork.py"
-
-    cmd = [
-        "python",
-        str(generation_script),
-        "-d",
-        config.gen_dir,
-        "-t",
-        config.test_dir,
-        "-p",
-        config.platform,
-    ]
-
-    # Add verbosity flags
-    if config.verbose > 0:
-        cmd.append("-" + "v" * config.verbose)
-
-    # Add debug flag
-    if config.debug:
-        cmd.append("--debug")
-
-    # Add additional generation arguments
-    cmd.extend(config.gen_args)
-
-    log.debug(f"[pytestRunner] Generation command: {' '.join(cmd)}")
-
-    result = subprocess.run(cmd, check = False)
-
-    if result.returncode != 0:
-        log.error(f"Network generation failed with return code {result.returncode}")
-        raise RuntimeError(f"Network generation failed for {config.test_name}")
-
-
-def configure_cmake(config: DeeployTestConfig) -> None:
-    """
-    Args:
-        config: Test configuration
-        
-    Raises:
-        RuntimeError: If CMake configuration fails
-    """
-    assert config.toolchain_install_dir is not None, \
-        "LLVM_INSTALL_DIR environment variable not set"
-
-    cmake_cmd = os.environ.get("CMAKE", "cmake")
-    if cmake_cmd == "cmake" and shutil.which("cmake") is None:
-        raise RuntimeError("CMake not found. Please install CMake or set CMAKE environment variable")
-
-    # Build CMake command
-    cmd = [
-        cmake_cmd,
-        f"-DTOOLCHAIN={config.toolchain}",
-        f"-DTOOLCHAIN_INSTALL_DIR={config.toolchain_install_dir}",
-        f"-DGENERATED_SOURCE={config.gen_dir}",
-        f"-Dplatform={config.platform}",
-        f"-DTESTNAME={config.test_name}",
-        f"-B{config.build_dir}",
-    ]
-
-    # Add custom CMake arguments
-    for arg in config.cmake_args:
-        if not arg.startswith("-D"):
-            arg = "-D" + arg
-        cmd.append(arg)
-
-    # Add simulator flags
-    if config.simulator == 'banshee':
-        cmd.append("-Dbanshee_simulation=ON")
-    else:
-        cmd.append("-Dbanshee_simulation=OFF")
-
-    if config.simulator == 'gvsoc':
-        cmd.append("-Dgvsoc_simulation=ON")
-    else:
-        cmd.append("-Dgvsoc_simulation=OFF")
-
-    # Last argument is the source directory
-    script_dir = Path(__file__).parent.parent
-    cmd.append(str(script_dir.parent))
-
-    env = os.environ.copy()
-    if config.verbose >= 3:
-        env["VERBOSE"] = "1"
-
-    log.debug(f"[pytestRunner] CMake command: {' '.join(cmd)}")
-
-    result = subprocess.run(cmd, check = False, env = env)
-
-    if result.returncode != 0:
-        log.error(f"CMake configuration failed with return code {result.returncode}")
-        raise RuntimeError(f"CMake configuration failed for {config.test_name}")
-
-
-def build_binary(config: DeeployTestConfig) -> None:
-    """
-    Args:
-        config: Test configuration
-        
-    Raises:
-        RuntimeError: If build fails
-    """
-    cmake_cmd = os.environ.get("CMAKE", "cmake")
-
-    cmd = [
-        cmake_cmd,
-        "--build",
-        config.build_dir,
-        "--target",
-        config.test_name,
-    ]
-
-    env = os.environ.copy()
-    if config.verbose >= 3:
-        env["VERBOSE"] = "1"
-
-    log.debug(f"[pytestRunner] Build command: {' '.join(cmd)}")
-
-    result = subprocess.run(cmd, check = False, env = env)
-
-    if result.returncode != 0:
-        log.error(f"Build failed with return code {result.returncode}")
-        raise RuntimeError(f"Build failed for {config.test_name}")
-
-
-def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
-    """
-    Args:
-        config: Test configuration
-        skip: If True, skip simulation (useful for build-only tests)
-        
-    Returns:
-        TestResult with parsed output
-        
-    Raises:
-        RuntimeError: If simulation cannot be executed
-    """
-    if skip:
-        log.info(f"Skipping simulation for {config.test_name}")
-        return TestResult(success = True, error_count = 0, total_count = 0, stdout = "Skipped")
-
-    if config.simulator == 'none':
-        raise RuntimeError("No simulator specified!")
-
-    if config.simulator == 'host':
-        # Run binary directly
-        binary_path = Path(config.build_dir) / "bin" / config.test_name
-        cmd = [str(binary_path)]
-    else:
-        # Run via CMake target
-        cmake_cmd = os.environ.get("CMAKE", "cmake")
-        cmd = [
-            cmake_cmd,
-            "--build",
-            config.build_dir,
-            "--target",
-            f"{config.simulator}_{config.test_name}",
-        ]
-
-    env = os.environ.copy()
-    if config.verbose >= 3:
-        env["VERBOSE"] = "1"
-
-    # Add banshee-specific logging
-    if config.simulator == 'banshee':
-        if config.verbose == 1:
-            env["BANSHEE_LOG"] = "warn"
-        elif config.verbose == 2:
-            env["BANSHEE_LOG"] = "info"
-        elif config.verbose >= 3:
-            env["BANSHEE_LOG"] = "debug"
-
-    log.debug(f"[pytestRunner] Simulation command: {' '.join(cmd)}")
-
-    result = subprocess.run(cmd, capture_output = True, text = True, env = env)
-
-    # Print captured output so it's visible when running with pytest -s
-    if result.stdout:
-        print(result.stdout, end = '')
-    if result.stderr:
-        print(result.stderr, end = '', file = sys.stderr)
-
-    # Parse output for error count
-    output = result.stdout + result.stderr
-
-    # Look for "Errors: X out of Y" pattern
-    error_match = re.search(r'Errors:\s*(\d+)\s*out\s*of\s*(\d+)', output)
-
-    if error_match:
-        error_count = int(error_match.group(1))
-        total_count = int(error_match.group(2))
-        success = (error_count == 0)
-    else:
-        # Could not parse output - treat as failure
-        log.warning(f"Could not parse error count from output:\n{output}")
-        error_count = -1
-        total_count = -1
-        success = False
-
-    # Try to parse runtime cycles
-    runtime_cycles = None
-    cycle_match = re.search(r'Runtime:\s*(\d+)\s*cycles', output)
-    if cycle_match:
-        runtime_cycles = int(cycle_match.group(1))
-
-    return TestResult(
-        success = success,
-        error_count = error_count,
-        total_count = total_count,
-        stdout = result.stdout,
-        stderr = result.stderr,
-        runtime_cycles = runtime_cycles,
-    )
-
-
-def run_complete_test(config: DeeployTestConfig, skipgen: bool = False, skipsim: bool = False) -> TestResult:
-    """
-    Run a complete test: generate, configure, build, and simulate.
-    
-    Args:
-        config: Test configuration
-        skipgen: Skip network generation
-        skipsim: Skip simulation
-        
-    Returns:
-        TestResult with parsed output
-    """
-    log.info(f"################## Testing {config.test_name} on {config.platform} Platform ##################")
-
-    # Step 1: Generate network
-    generate_network(config, skip = skipgen)
-
-    # Step 2: Configure CMake
-    configure_cmake(config)
-
-    # Step 3: Build binary
-    build_binary(config)
-
-    # Step 4: Run simulation
-    result = run_simulation(config, skip = skipsim)
-
-    return result
+from .core import DeeployTestConfig, get_test_paths, run_complete_test
 
 
 def get_worker_id() -> str:
@@ -381,40 +40,13 @@ def create_test_config(
     randomized_mem_scheduler: bool = False,
     gen_args: Optional[List[str]] = None,
 ) -> DeeployTestConfig:
-    """
-    Create DeeployTestConfig for a specific test and platform.
-    
-    Args:
-        test_name: Name of the test
-        platform: Target platform (e.g., "Generic", "QEMU-ARM", "Siracusa")
-        simulator: Simulator to use
-        deeploy_test_dir: Base DeeployTest directory
-        toolchain: Toolchain to use - LLVM/GCC
-        toolchain_dir: Path to toolchain installation
-        cmake_args: Additional CMake arguments
-        tiling: Whether to use tiling
-        cores: Number of cores (for Siracusa platforms)
-        l1: L1 memory size in bytes (for tiled platforms)
-        l2: L2 memory size in bytes (default: 1024000)
-        default_mem_level: Default memory level ("L2" or "L3")
-        double_buffer: Enable double buffering
-        mem_alloc_strategy: Memory allocation strategy
-        search_strategy: CP solver search strategy
-        profile_tiling: Enable tiling profiling
-        plot_mem_alloc: Enable memory allocation plotting
-        randomized_mem_scheduler: Enable randomized memory scheduler
-        
-    Returns:
-        DeeployTestConfig instance
-    """
+
     test_dir = f"Tests/{test_name}"
 
     gen_dir, test_dir_abs, test_name_clean = get_test_paths(test_dir, platform, base_dir = deeploy_test_dir)
 
     worker_id = get_worker_id()
 
-    # Build directory: shared per worker, not per test (for ccache efficiency)
-    # Only add worker suffix for parallel execution (worker_id != "master")
     if worker_id == "master":
         build_dir = str(Path(deeploy_test_dir) / f"TEST_{platform.upper()}" / "build_master")
     else:
@@ -469,12 +101,6 @@ def create_test_config(
 def run_and_assert_test(test_name: str, config: DeeployTestConfig, skipgen: bool, skipsim: bool) -> None:
     """
     Shared helper function to run a test and assert its results.
-    
-    Args:
-        test_name: Name of the test
-        config: DeeployTestConfig instance
-        skipgen: Whether to skip network generation
-        skipsim: Whether to skip simulation
         
     Raises:
         AssertionError: If test fails or has errors

From c92e778efeba862947b8b1ec8ae8d03431d50899 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 16 Jan 2026 10:58:39 +0100
Subject: [PATCH 46/51] Add deeploy runners for the other platforms

---
 DeeployTest/deeployRunner_chimera.py   | 19 +++++++++++++++++++
 DeeployTest/deeployRunner_mempool.py   | 19 +++++++++++++++++++
 DeeployTest/deeployRunner_softhier.py  | 20 ++++++++++++++++++++
 DeeployTest/testUtils/deeployRunner.py |  3 +++
 4 files changed, 61 insertions(+)
 create mode 100644 DeeployTest/deeployRunner_chimera.py
 create mode 100644 DeeployTest/deeployRunner_mempool.py
 create mode 100644 DeeployTest/deeployRunner_softhier.py

diff --git a/DeeployTest/deeployRunner_chimera.py b/DeeployTest/deeployRunner_chimera.py
new file mode 100644
index 0000000000..80a560b83e
--- /dev/null
+++ b/DeeployTest/deeployRunner_chimera.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Deeploy runner for Chimera platform."""
+
+from testUtils.deeployRunner import main
+import sys
+
+
+if __name__ == "__main__":
+    
+    # Define parser setup callback to add Chimera-specific arguments
+    def setup_parser(parser):
+        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
+    
+    sys.exit(main(default_platform="Chimera", default_simulator="gvsoc", tiling_enabled=False,
+                  parser_setup_callback=setup_parser))
diff --git a/DeeployTest/deeployRunner_mempool.py b/DeeployTest/deeployRunner_mempool.py
new file mode 100644
index 0000000000..ca9f22633c
--- /dev/null
+++ b/DeeployTest/deeployRunner_mempool.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Deeploy runner for MemPool platform."""
+
+from testUtils.deeployRunner import main
+import sys
+
+
+if __name__ == "__main__":
+    
+    # Define parser setup callback to add MemPool-specific arguments
+    def setup_parser(parser):
+        parser.add_argument('--num-cores', type=int, default=16, dest='num_cores', help='Number of cores (default: 16)\n')
+    
+    sys.exit(main(default_platform="MemPool", default_simulator="banshee", tiling_enabled=False,
+                  parser_setup_callback=setup_parser))
diff --git a/DeeployTest/deeployRunner_softhier.py b/DeeployTest/deeployRunner_softhier.py
new file mode 100644
index 0000000000..c4d62fed70
--- /dev/null
+++ b/DeeployTest/deeployRunner_softhier.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""Deeploy runner for SoftHier platform."""
+
+from testUtils.deeployRunner import main
+import sys
+
+
+if __name__ == "__main__":
+    
+    # Define parser setup callback to add SoftHier-specific arguments
+    def setup_parser(parser):
+        parser.add_argument('--num-clusters', type=int, default=1, dest='num_clusters', help='Number of clusters (default: 1)\n')
+        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
+    
+    sys.exit(main(default_platform="SoftHier", default_simulator="gvsoc", tiling_enabled=False,
+                  parser_setup_callback=setup_parser))
diff --git a/DeeployTest/testUtils/deeployRunner.py b/DeeployTest/testUtils/deeployRunner.py
index 273b681762..566c521360 100644
--- a/DeeployTest/testUtils/deeployRunner.py
+++ b/DeeployTest/testUtils/deeployRunner.py
@@ -380,6 +380,9 @@ def main(default_platform: Optional[str] = None,
         platform_specific_cmake_args.append(f"-DNUM_CORES={args.cores}")
     elif hasattr(args, 'num_cores'):
         platform_specific_cmake_args.append(f"-DNUM_CORES={args.num_cores}")
+    
+    if hasattr(args, 'num_clusters'):
+        platform_specific_cmake_args.append(f"-DNUM_CLUSTERS={args.num_clusters}")
 
     config = create_config_from_args(args, platform, simulator, tiling_enabled, platform_specific_cmake_args)
 

From fff1d7715d538b8303bba3faa09eb8bc1666704b Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 16 Jan 2026 11:00:27 +0100
Subject: [PATCH 47/51] Update README

---
 README.md | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index 429bb920b7..a9f6251876 100644
--- a/README.md
+++ b/README.md
@@ -55,16 +55,16 @@ pip install -e . --extra-index-url=https://pypi.ngc.nvidia.com
 Congratulations, you installed Deeploy and its dependencies! Now, to test your installation let's run one simple test on each platform with the following commands:
 ```
 cd DeeployTest
-python testRunner_generic.py -t ./Tests/Kernels/Integer/Add/Regular
-python testRunner_cortexm.py -t ./Tests/Kernels/Integer/Add/Regular
-python testRunner_mempool.py -t ./Tests/Kernels/Integer/Add/Regular
-python testRunner_snitch.py -t ./Tests/Kernels/Integer/Add/Regular
-python testRunner_tiled_snitch.py -t ./Tests/Kernels/Integer/Add/Regular
-python testRunner_siracusa.py -t ./Tests/Kernels/Integer/Add/Regular
-python testRunner_tiled_siracusa.py -t ./Tests/Kernels/Integer/Add/Regular
-python testRunner_tiled_siracusa_w_neureka.py -t ./Tests/Kernels/Integer/Add/Regular
-python testRunner_softhier.py -t ./Tests/Kernels/Integer/Add/Regular --toolchain=GCC
-python testRunner_chimera.py -t ./Tests/Kernels/Integer/Add/Regular
+python deeployRunner_generic.py -t ./Tests/Kernels/Integer/Add/Regular
+python deeployRunner_cortexm.py -t ./Tests/Kernels/Integer/Add/Regular
+python deeployRunner_mempool.py -t ./Tests/Kernels/Integer/Add/Regular
+python deeployRunner_snitch.py -t ./Tests/Kernels/Integer/Add/Regular
+python deeployRunner_tiled_snitch.py -t ./Tests/Kernels/Integer/Add/Regular
+python deeployRunner_siracusa.py -t ./Tests/Kernels/Integer/Add/Regular
+python deeployRunner_tiled_siracusa.py -t ./Tests/Kernels/Integer/Add/Regular
+python deeployRunner_tiled_siracusa_w_neureka.py -t ./Tests/Kernels/Integer/Add/Regular
+python deeployRunner_softhier.py -t ./Tests/Kernels/Integer/Add/Regular --toolchain=GCC
+python deeployRunner_chimera.py -t ./Tests/Kernels/Integer/Add/Regular
 ```
 
 To restart and connect to the container, run:
@@ -73,9 +73,9 @@ docker start -i deeploy_main
 cd Deeploy
 ```
 
-You can find the ONNX file in `DeeployTest/Tests/Adder`, to visualize it, you can use [Netron](https://netron.app/). You can also find the generated code for the platform X in `TEST_X` in `DeeployTest` and you should notice that the generated code for the `Adder` test is very simple. However, this gets more complex when you add tiling. Let's generate the code for a single layer but using tiling this time:
+You can find the ONNX file in `DeeployTest/Tests/Kernels/Integer/Add/Regular`, to visualize it, you can use [Netron](https://netron.app/). You can also find the generated code for the platform X in `TEST_X` in `DeeployTest` and you should notice that the generated code for the `Add` test is very simple. However, this gets more complex when you add tiling. Let's generate the code for a single layer but using tiling this time:
 ```
-python testRunner_tiled_siracusa.py -t Tests/testMatMul --cores=8 --l1=16000
+python deeployRunner_tiled_siracusa.py -t Tests/Kernels/Integer/MatMul/Regular --cores=8 --l1=16000
 ```
 Now you can open the generated code in `DeeployTest/TEST_SIRACUSA/Tests/testMatMul/Network.c` and see how we executed a tiled layer.
 

From 0eb56b9d0d02b19870e203229b5721410e1051df Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 16 Jan 2026 11:59:49 +0100
Subject: [PATCH 48/51] Fix import

---
 DeeployTest/testUtils/pytestRunner.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index 875de8113b..36a1b88200 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -7,6 +7,7 @@
 from typing import List, Literal, Optional
 
 from .core import DeeployTestConfig, get_test_paths, run_complete_test
+from .core.execution import build_binary, configure_cmake, run_simulation
 
 
 def get_worker_id() -> str:

From 5b7dd6346f8e52110a0bfea048510b655c8db2b5 Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 16 Jan 2026 12:02:07 +0100
Subject: [PATCH 49/51] Format and lint

---
 DeeployTest/deeployRunner_chimera.py          |  15 +-
 DeeployTest/deeployRunner_cortexm.py          |   4 +-
 DeeployTest/deeployRunner_generic.py          |   4 +-
 DeeployTest/deeployRunner_mempool.py          |  19 +-
 DeeployTest/deeployRunner_siracusa.py         |  13 +-
 DeeployTest/deeployRunner_snitch.py           |  19 +-
 DeeployTest/deeployRunner_softhier.py         |  21 +-
 DeeployTest/deeployRunner_tiled_siracusa.py   |  15 +-
 .../deeployRunner_tiled_siracusa_w_neureka.py |  19 +-
 DeeployTest/deeployRunner_tiled_snitch.py     |  19 +-
 DeeployTest/testUtils/core/execution.py       |  18 +-
 DeeployTest/testUtils/core/output_parser.py   |  12 +-
 DeeployTest/testUtils/deeployRunner.py        | 280 +++++++++---------
 13 files changed, 241 insertions(+), 217 deletions(-)

diff --git a/DeeployTest/deeployRunner_chimera.py b/DeeployTest/deeployRunner_chimera.py
index 80a560b83e..1908c568a6 100644
--- a/DeeployTest/deeployRunner_chimera.py
+++ b/DeeployTest/deeployRunner_chimera.py
@@ -2,18 +2,19 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Deeploy runner for Chimera platform."""
 
 from testUtils.deeployRunner import main
 import sys
 
-
 if __name__ == "__main__":
-    
+
     # Define parser setup callback to add Chimera-specific arguments
     def setup_parser(parser):
-        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
-    
-    sys.exit(main(default_platform="Chimera", default_simulator="gvsoc", tiling_enabled=False,
-                  parser_setup_callback=setup_parser))
+        parser.add_argument('--cores', type = int, default = 8, help = 'Number of cores (default: 8)\n')
+
+    sys.exit(
+        main(default_platform = "Chimera",
+             default_simulator = "gvsoc",
+             tiling_enabled = False,
+             parser_setup_callback = setup_parser))
diff --git a/DeeployTest/deeployRunner_cortexm.py b/DeeployTest/deeployRunner_cortexm.py
index f6f999a073..b468abdeca 100644
--- a/DeeployTest/deeployRunner_cortexm.py
+++ b/DeeployTest/deeployRunner_cortexm.py
@@ -3,10 +3,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-
 import sys
 from testUtils.deeployRunner import main
 
-
 if __name__ == "__main__":
-    sys.exit(main(default_platform="QEMU-ARM", default_simulator="qemu", tiling_enabled=False))
+    sys.exit(main(default_platform = "QEMU-ARM", default_simulator = "qemu", tiling_enabled = False))
diff --git a/DeeployTest/deeployRunner_generic.py b/DeeployTest/deeployRunner_generic.py
index e239921525..ad006d3bc4 100644
--- a/DeeployTest/deeployRunner_generic.py
+++ b/DeeployTest/deeployRunner_generic.py
@@ -3,10 +3,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-
 import sys
 from testUtils.deeployRunner import main
 
-
 if __name__ == "__main__":
-    sys.exit(main(default_platform="Generic", default_simulator="host", tiling_enabled=False))
+    sys.exit(main(default_platform = "Generic", default_simulator = "host", tiling_enabled = False))
diff --git a/DeeployTest/deeployRunner_mempool.py b/DeeployTest/deeployRunner_mempool.py
index ca9f22633c..bbbb505648 100644
--- a/DeeployTest/deeployRunner_mempool.py
+++ b/DeeployTest/deeployRunner_mempool.py
@@ -2,18 +2,23 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Deeploy runner for MemPool platform."""
 
 from testUtils.deeployRunner import main
 import sys
 
-
 if __name__ == "__main__":
-    
+
     # Define parser setup callback to add MemPool-specific arguments
     def setup_parser(parser):
-        parser.add_argument('--num-cores', type=int, default=16, dest='num_cores', help='Number of cores (default: 16)\n')
-    
-    sys.exit(main(default_platform="MemPool", default_simulator="banshee", tiling_enabled=False,
-                  parser_setup_callback=setup_parser))
+        parser.add_argument('--num-cores',
+                            type = int,
+                            default = 16,
+                            dest = 'num_cores',
+                            help = 'Number of cores (default: 16)\n')
+
+    sys.exit(
+        main(default_platform = "MemPool",
+             default_simulator = "banshee",
+             tiling_enabled = False,
+             parser_setup_callback = setup_parser))
diff --git a/DeeployTest/deeployRunner_siracusa.py b/DeeployTest/deeployRunner_siracusa.py
index f087a14981..9b36471b76 100644
--- a/DeeployTest/deeployRunner_siracusa.py
+++ b/DeeployTest/deeployRunner_siracusa.py
@@ -6,13 +6,14 @@
 from testUtils.deeployRunner import main
 import sys
 
-
 if __name__ == "__main__":
-    
+
     # Define parser setup callback to add Siracusa-specific arguments
     def setup_parser(parser):
-        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
-    
-    sys.exit(main(default_platform="Siracusa", default_simulator="gvsoc", tiling_enabled=False,
-                  parser_setup_callback=setup_parser))
+        parser.add_argument('--cores', type = int, default = 8, help = 'Number of cores (default: 8)\n')
 
+    sys.exit(
+        main(default_platform = "Siracusa",
+             default_simulator = "gvsoc",
+             tiling_enabled = False,
+             parser_setup_callback = setup_parser))
diff --git a/DeeployTest/deeployRunner_snitch.py b/DeeployTest/deeployRunner_snitch.py
index 88d34dc2a7..be700591e9 100644
--- a/DeeployTest/deeployRunner_snitch.py
+++ b/DeeployTest/deeployRunner_snitch.py
@@ -3,16 +3,21 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-
 from testUtils.deeployRunner import main
 import sys
 
-
 if __name__ == "__main__":
-    
+
     # Define parser setup callback to add Snitch-specific arguments
     def setup_parser(parser):
-        parser.add_argument('--num-cores', type=int, default=8, dest='num_cores', help='Number of cores (default: 8)\n')
-    
-    sys.exit(main(default_platform="Snitch", default_simulator="gvsoc", tiling_enabled=False,
-                  parser_setup_callback=setup_parser))
+        parser.add_argument('--num-cores',
+                            type = int,
+                            default = 8,
+                            dest = 'num_cores',
+                            help = 'Number of cores (default: 8)\n')
+
+    sys.exit(
+        main(default_platform = "Snitch",
+             default_simulator = "gvsoc",
+             tiling_enabled = False,
+             parser_setup_callback = setup_parser))
diff --git a/DeeployTest/deeployRunner_softhier.py b/DeeployTest/deeployRunner_softhier.py
index c4d62fed70..e11469a2d6 100644
--- a/DeeployTest/deeployRunner_softhier.py
+++ b/DeeployTest/deeployRunner_softhier.py
@@ -2,19 +2,24 @@
 # SPDX-FileCopyrightText: 2025 ETH Zurich and University of Bologna
 #
 # SPDX-License-Identifier: Apache-2.0
-
 """Deeploy runner for SoftHier platform."""
 
 from testUtils.deeployRunner import main
 import sys
 
-
 if __name__ == "__main__":
-    
+
     # Define parser setup callback to add SoftHier-specific arguments
     def setup_parser(parser):
-        parser.add_argument('--num-clusters', type=int, default=1, dest='num_clusters', help='Number of clusters (default: 1)\n')
-        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
-    
-    sys.exit(main(default_platform="SoftHier", default_simulator="gvsoc", tiling_enabled=False,
-                  parser_setup_callback=setup_parser))
+        parser.add_argument('--num-clusters',
+                            type = int,
+                            default = 1,
+                            dest = 'num_clusters',
+                            help = 'Number of clusters (default: 1)\n')
+        parser.add_argument('--cores', type = int, default = 8, help = 'Number of cores (default: 8)\n')
+
+    sys.exit(
+        main(default_platform = "SoftHier",
+             default_simulator = "gvsoc",
+             tiling_enabled = False,
+             parser_setup_callback = setup_parser))
diff --git a/DeeployTest/deeployRunner_tiled_siracusa.py b/DeeployTest/deeployRunner_tiled_siracusa.py
index 29f2fbabc0..01eea59858 100644
--- a/DeeployTest/deeployRunner_tiled_siracusa.py
+++ b/DeeployTest/deeployRunner_tiled_siracusa.py
@@ -3,16 +3,17 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-
 import sys
 from testUtils.deeployRunner import main
 
-
 if __name__ == "__main__":
-    
+
     # Define parser setup callback to add Siracusa-specific arguments
     def setup_parser(parser):
-        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
-    
-    sys.exit(main(default_platform="Siracusa", default_simulator="gvsoc", tiling_enabled=True,
-                  parser_setup_callback=setup_parser))
+        parser.add_argument('--cores', type = int, default = 8, help = 'Number of cores (default: 8)\n')
+
+    sys.exit(
+        main(default_platform = "Siracusa",
+             default_simulator = "gvsoc",
+             tiling_enabled = True,
+             parser_setup_callback = setup_parser))
diff --git a/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py b/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py
index fcd05282be..5624b6607e 100644
--- a/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py
+++ b/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py
@@ -3,18 +3,19 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-
 from testUtils.deeployRunner import main
 import sys
 
-
 if __name__ == "__main__":
-    
+
     # Define parser setup callback to add Siracusa+Neureka-specific arguments
     def setup_parser(parser):
-        parser.add_argument('--cores', type=int, default=8, help='Number of cores (default: 8)\n')
-        parser.add_argument('--neureka-wmem', action='store_true', help='Enable Neureka weight memory\n')
-        parser.add_argument('--enable-3x3', action='store_true', help='Enable 3x3 convolutions\n')
-    
-    sys.exit(main(default_platform="Siracusa_w_neureka", default_simulator="gvsoc", tiling_enabled=True,
-                  parser_setup_callback=setup_parser))
+        parser.add_argument('--cores', type = int, default = 8, help = 'Number of cores (default: 8)\n')
+        parser.add_argument('--neureka-wmem', action = 'store_true', help = 'Enable Neureka weight memory\n')
+        parser.add_argument('--enable-3x3', action = 'store_true', help = 'Enable 3x3 convolutions\n')
+
+    sys.exit(
+        main(default_platform = "Siracusa_w_neureka",
+             default_simulator = "gvsoc",
+             tiling_enabled = True,
+             parser_setup_callback = setup_parser))
diff --git a/DeeployTest/deeployRunner_tiled_snitch.py b/DeeployTest/deeployRunner_tiled_snitch.py
index 06cf455a19..60ee08c7bf 100644
--- a/DeeployTest/deeployRunner_tiled_snitch.py
+++ b/DeeployTest/deeployRunner_tiled_snitch.py
@@ -3,16 +3,21 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-
 from testUtils.deeployRunner import main
 import sys
 
-
 if __name__ == "__main__":
-    
+
     # Define parser setup callback to add Snitch-specific arguments
     def setup_parser(parser):
-        parser.add_argument('--num-cores', type=int, default=8, dest='num_cores', help='Number of cores (default: 8)\n')
-    
-    sys.exit(main(default_platform="Snitch", default_simulator="gvsoc", tiling_enabled=True,
-                  parser_setup_callback=setup_parser))
+        parser.add_argument('--num-cores',
+                            type = int,
+                            default = 8,
+                            dest = 'num_cores',
+                            help = 'Number of cores (default: 8)\n')
+
+    sys.exit(
+        main(default_platform = "Snitch",
+             default_simulator = "gvsoc",
+             tiling_enabled = True,
+             parser_setup_callback = setup_parser))
diff --git a/DeeployTest/testUtils/core/execution.py b/DeeployTest/testUtils/core/execution.py
index 4a00c2fc11..46ed86d303 100644
--- a/DeeployTest/testUtils/core/execution.py
+++ b/DeeployTest/testUtils/core/execution.py
@@ -56,7 +56,7 @@ def generate_network(config: DeeployTestConfig, skip: bool = False) -> None:
 
     log.debug(f"[Execution] Generation command: {' '.join(cmd)}")
 
-    result = subprocess.run(cmd, check=False)
+    result = subprocess.run(cmd, check = False)
 
     if result.returncode != 0:
         log.error(f"Network generation failed with return code {result.returncode}")
@@ -108,7 +108,7 @@ def configure_cmake(config: DeeployTestConfig) -> None:
 
     log.debug(f"[Execution] CMake command: {' '.join(cmd)}")
 
-    result = subprocess.run(cmd, check=False, env=env)
+    result = subprocess.run(cmd, check = False, env = env)
 
     if result.returncode != 0:
         log.error(f"CMake configuration failed with return code {result.returncode}")
@@ -133,7 +133,7 @@ def build_binary(config: DeeployTestConfig) -> None:
 
     log.debug(f"[Execution] Build command: {' '.join(cmd)}")
 
-    result = subprocess.run(cmd, check=False, env=env)
+    result = subprocess.run(cmd, check = False, env = env)
 
     if result.returncode != 0:
         log.error(f"Build failed with return code {result.returncode}")
@@ -149,7 +149,7 @@ def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
     """
     if skip:
         log.info(f"Skipping simulation for {config.test_name}")
-        return TestResult(success=True, error_count=0, total_count=0, stdout="Skipped")
+        return TestResult(success = True, error_count = 0, total_count = 0, stdout = "Skipped")
 
     if config.simulator == 'none':
         raise RuntimeError("No simulator specified!")
@@ -183,12 +183,12 @@ def run_simulation(config: DeeployTestConfig, skip: bool = False) -> TestResult:
 
     log.debug(f"[Execution] Simulation command: {' '.join(cmd)}")
 
-    result = subprocess.run(cmd, capture_output=True, text=True, env=env)
+    result = subprocess.run(cmd, capture_output = True, text = True, env = env)
 
     if result.stdout:
-        print(result.stdout, end='')
+        print(result.stdout, end = '')
     if result.stderr:
-        print(result.stderr, end='', file=sys.stderr)
+        print(result.stderr, end = '', file = sys.stderr)
 
     # Parse output for error count and cycles
     test_result = parse_test_output(result.stdout, result.stderr)
@@ -206,7 +206,7 @@ def run_complete_test(config: DeeployTestConfig, skipgen: bool = False, skipsim:
     log.info(f"################## Testing {config.test_name} on {config.platform} Platform ##################")
 
     # Step 1: Generate network
-    generate_network(config, skip=skipgen)
+    generate_network(config, skip = skipgen)
 
     # Step 2: Configure CMake
     configure_cmake(config)
@@ -215,6 +215,6 @@ def run_complete_test(config: DeeployTestConfig, skipgen: bool = False, skipsim:
     build_binary(config)
 
     # Step 4: Run simulation
-    result = run_simulation(config, skip=skipsim)
+    result = run_simulation(config, skip = skipsim)
 
     return result
diff --git a/DeeployTest/testUtils/core/output_parser.py b/DeeployTest/testUtils/core/output_parser.py
index 9e32a291a6..ffb91ce0a7 100644
--- a/DeeployTest/testUtils/core/output_parser.py
+++ b/DeeployTest/testUtils/core/output_parser.py
@@ -40,10 +40,10 @@ def parse_test_output(stdout: str, stderr: str = "") -> TestResult:
         runtime_cycles = int(cycle_match.group(1))
 
     return TestResult(
-        success=success,
-        error_count=error_count,
-        total_count=total_count,
-        stdout=stdout,
-        stderr=stderr,
-        runtime_cycles=runtime_cycles,
+        success = success,
+        error_count = error_count,
+        total_count = total_count,
+        stdout = stdout,
+        stderr = stderr,
+        runtime_cycles = runtime_cycles,
     )
diff --git a/DeeployTest/testUtils/deeployRunner.py b/DeeployTest/testUtils/deeployRunner.py
index 566c521360..282e268a3b 100644
--- a/DeeployTest/testUtils/deeployRunner.py
+++ b/DeeployTest/testUtils/deeployRunner.py
@@ -23,166 +23,168 @@ def cmake_str(arg_str):
 
 class _ArgumentDefaultMetavarTypeFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.MetavarTypeHelpFormatter):
 
-    def __init__(self, prog: str, indent_increment: int = 2, max_help_position: int = 100, width=None) -> None:
+    def __init__(self, prog: str, indent_increment: int = 2, max_help_position: int = 100, width = None) -> None:
         super().__init__(prog, indent_increment, max_help_position, width)
 
 
 class DeeployRunnerArgumentParser(argparse.ArgumentParser):
 
-    def __init__(self, tiling_arguments: bool, description: Optional[str] = None, platform_required: bool = True, 
+    def __init__(self,
+                 tiling_arguments: bool,
+                 description: Optional[str] = None,
+                 platform_required: bool = True,
                  allow_extra_args: bool = False):
         formatter = _ArgumentDefaultMetavarTypeFormatter
 
         if description is None:
-            super().__init__(description="Deeploy Code Generation and Test Utility.", formatter_class=formatter)
+            super().__init__(description = "Deeploy Code Generation and Test Utility.", formatter_class = formatter)
         else:
-            super().__init__(description=description, formatter_class=formatter)
-        
+            super().__init__(description = description, formatter_class = formatter)
+
         self.allow_extra_args = allow_extra_args
 
         self.tiling_arguments = tiling_arguments
 
         self.add_argument('-t',
-                          metavar='<dir>',
-                          dest='dir',
-                          type=str,
-                          required=True,
-                          help='Test directory (e.g., Tests/Kernels/Integer/Add/Regular)\n')
+                          metavar = '<dir>',
+                          dest = 'dir',
+                          type = str,
+                          required = True,
+                          help = 'Test directory (e.g., Tests/Kernels/Integer/Add/Regular)\n')
         self.add_argument('-p',
-                          metavar='<platform>',
-                          dest='platform',
-                          type=str,
-                          required=platform_required,
-                          default=None,
-                          help='Target platform (e.g., Generic, QEMU-ARM, Siracusa, Snitch)\n')
+                          metavar = '<platform>',
+                          dest = 'platform',
+                          type = str,
+                          required = platform_required,
+                          default = None,
+                          help = 'Target platform (e.g., Generic, QEMU-ARM, Siracusa, Snitch)\n')
         self.add_argument('-s',
-                          metavar='<simulator>',
-                          dest='simulator',
-                          type=str,
-                          default=None,
-                          help='Simulator to use (gvsoc, banshee, qemu, vsim, host, none)\n')
-        self.add_argument('-v', action='count', dest='verbose', default=0, help='Increase verbosity level\n')
+                          metavar = '<simulator>',
+                          dest = 'simulator',
+                          type = str,
+                          default = None,
+                          help = 'Simulator to use (gvsoc, banshee, qemu, vsim, host, none)\n')
+        self.add_argument('-v', action = 'count', dest = 'verbose', default = 0, help = 'Increase verbosity level\n')
         self.add_argument('-D',
-                          dest='cmake',
-                          action='extend',
-                          nargs="*",
-                          type=cmake_str,
-                          help="Create or update a cmake cache entry\n")
+                          dest = 'cmake',
+                          action = 'extend',
+                          nargs = "*",
+                          type = cmake_str,
+                          help = "Create or update a cmake cache entry\n")
         self.add_argument('--debug',
-                          dest='debug',
-                          action='store_true',
-                          default=False,
-                          help='Enable debugging mode\n')
+                          dest = 'debug',
+                          action = 'store_true',
+                          default = False,
+                          help = 'Enable debugging mode\n')
         self.add_argument('--skipgen',
-                          dest='skipgen',
-                          action='store_true',
-                          default=False,
-                          help='Skip network generation (reuse existing generated code)\n')
+                          dest = 'skipgen',
+                          action = 'store_true',
+                          default = False,
+                          help = 'Skip network generation (reuse existing generated code)\n')
         self.add_argument('--skipsim',
-                          dest='skipsim',
-                          action='store_true',
-                          default=False,
-                          help='Skip simulation (build only)\n')
+                          dest = 'skipsim',
+                          action = 'store_true',
+                          default = False,
+                          help = 'Skip simulation (build only)\n')
         self.add_argument('--toolchain',
-                          metavar='<LLVM|GCC>',
-                          dest='toolchain',
-                          type=str,
-                          default="LLVM",
-                          help='Compiler toolchain\n')
+                          metavar = '<LLVM|GCC>',
+                          dest = 'toolchain',
+                          type = str,
+                          default = "LLVM",
+                          help = 'Compiler toolchain\n')
         self.add_argument('--toolchain-install-dir',
-                          metavar='<dir>',
-                          dest='toolchain_install_dir',
-                          type=str,
-                          default=os.environ.get('LLVM_INSTALL_DIR'),
-                          help='Toolchain installation directory\n')
+                          metavar = '<dir>',
+                          dest = 'toolchain_install_dir',
+                          type = str,
+                          default = os.environ.get('LLVM_INSTALL_DIR'),
+                          help = 'Toolchain installation directory\n')
         self.add_argument('--input-type-map',
-                          nargs='*',
-                          default=[],
-                          type=str,
-                          help='(Optional) mapping of input names to data types. '
+                          nargs = '*',
+                          default = [],
+                          type = str,
+                          help = '(Optional) mapping of input names to data types. '
                           'Example: --input-type-map input_0=int8_t input_1=float32_t\n')
         self.add_argument('--input-offset-map',
-                          nargs='*',
-                          default=[],
-                          type=str,
-                          help='(Optional) mapping of input names to offsets. '
+                          nargs = '*',
+                          default = [],
+                          type = str,
+                          help = '(Optional) mapping of input names to offsets. '
                           'Example: --input-offset-map input_0=0 input_1=128\n')
 
         if self.tiling_arguments:
             self.add_argument('--defaultMemLevel',
-                              metavar='<level>',
-                              dest='defaultMemLevel',
-                              type=str,
-                              default="L2",
-                              help='Default memory level (L2 or L3)\n')
-            self.add_argument('--doublebuffer',
-                              action='store_true',
-                              help='Enable double buffering\n')
+                              metavar = '<level>',
+                              dest = 'defaultMemLevel',
+                              type = str,
+                              default = "L2",
+                              help = 'Default memory level (L2 or L3)\n')
+            self.add_argument('--doublebuffer', action = 'store_true', help = 'Enable double buffering\n')
             self.add_argument('--l1',
-                              metavar='<size>',
-                              dest='l1',
-                              type=int,
-                              default=64000,
-                              help='L1 size in bytes\n')
+                              metavar = '<size>',
+                              dest = 'l1',
+                              type = int,
+                              default = 64000,
+                              help = 'L1 size in bytes\n')
             self.add_argument('--l2',
-                              metavar='<size>',
-                              dest='l2',
-                              type=int,
-                              default=1024000,
-                              help='L2 size in bytes\n')
+                              metavar = '<size>',
+                              dest = 'l2',
+                              type = int,
+                              default = 1024000,
+                              help = 'L2 size in bytes\n')
             self.add_argument('--randomizedMemoryScheduler',
-                              action="store_true",
-                              help='Enable randomized memory scheduler\n')
-            self.add_argument('--profileTiling',
-                              action='store_true',
-                              help='Enable tiling profiling\n')
+                              action = "store_true",
+                              help = 'Enable randomized memory scheduler\n')
+            self.add_argument('--profileTiling', action = 'store_true', help = 'Enable tiling profiling\n')
             self.add_argument('--memAllocStrategy',
-                              metavar='<strategy>',
-                              dest='memAllocStrategy',
-                              type=str,
-                              default="MiniMalloc",
-                              help='Memory allocation strategy: TetrisRandom, TetrisCo-Opt, MiniMalloc\n')
+                              metavar = '<strategy>',
+                              dest = 'memAllocStrategy',
+                              type = str,
+                              default = "MiniMalloc",
+                              help = 'Memory allocation strategy: TetrisRandom, TetrisCo-Opt, MiniMalloc\n')
             self.add_argument('--searchStrategy',
-                              metavar='<strategy>',
-                              dest='searchStrategy',
-                              type=str,
-                              default="random-max",
-                              help='CP solver search strategy: random-max, max, min\n')
+                              metavar = '<strategy>',
+                              dest = 'searchStrategy',
+                              type = str,
+                              default = "random-max",
+                              help = 'CP solver search strategy: random-max, max, min\n')
             self.add_argument('--plotMemAlloc',
-                              action='store_true',
-                              help='Plot memory allocation and save in deeployState folder\n')
+                              action = 'store_true',
+                              help = 'Plot memory allocation and save in deeployState folder\n')
 
         self.args = None
 
-    def parse_args(self, args=None, namespace=None) -> argparse.Namespace:
-        
+    def parse_args(self, args = None, namespace = None) -> argparse.Namespace:
+
         self.args = super().parse_args(args, namespace)
 
         if self.args.verbose > 2:
-            coloredlogs.install(level='DEBUG', logger=log, fmt=DETAILED_FILE_LOG_FORMAT)
+            coloredlogs.install(level = 'DEBUG', logger = log, fmt = DETAILED_FILE_LOG_FORMAT)
         elif self.args.verbose > 1:
-            coloredlogs.install(level='DEBUG', logger=log, fmt=DEFAULT_FMT)
+            coloredlogs.install(level = 'DEBUG', logger = log, fmt = DEFAULT_FMT)
         elif self.args.verbose > 0:
-            coloredlogs.install(level='INFO', logger=log, fmt=DEFAULT_FMT)
+            coloredlogs.install(level = 'INFO', logger = log, fmt = DEFAULT_FMT)
         else:
-            coloredlogs.install(level='WARNING', logger=log, fmt=DEFAULT_FMT)
+            coloredlogs.install(level = 'WARNING', logger = log, fmt = DEFAULT_FMT)
 
         return self.args
 
 
-def create_config_from_args(args: argparse.Namespace, platform: str, simulator: str, tiling: bool, 
+def create_config_from_args(args: argparse.Namespace,
+                            platform: str,
+                            simulator: str,
+                            tiling: bool,
                             platform_specific_cmake_args: Optional[list] = None) -> DeeployTestConfig:
 
     script_path = Path(__file__).resolve()
     base_dir = script_path.parent.parent
 
     test_dir = args.dir
-    gen_dir, test_dir_abs, test_name = get_test_paths(test_dir, platform, base_dir=str(base_dir))
+    gen_dir, test_dir_abs, test_name = get_test_paths(test_dir, platform, base_dir = str(base_dir))
 
     build_dir = str(base_dir / f"TEST_{platform.upper()}" / "build")
 
     cmake_args_list = list(args.cmake) if args.cmake else []
-    
+
     # Add platform-specific CMake args
     if platform_specific_cmake_args:
         cmake_args_list.extend(platform_specific_cmake_args)
@@ -218,26 +220,26 @@ def create_config_from_args(args: argparse.Namespace, platform: str, simulator:
             gen_args_list.append("--plotMemAlloc")
 
     config = DeeployTestConfig(
-        test_name=test_name,
-        test_dir=test_dir_abs,
-        platform=platform,
-        simulator=simulator,
-        tiling=tiling,
-        gen_dir=gen_dir,
-        build_dir=build_dir,
-        toolchain=args.toolchain,
-        toolchain_install_dir=args.toolchain_install_dir,
-        cmake_args=cmake_args_list,
-        gen_args=gen_args_list,
-        verbose=args.verbose,
-        debug=args.debug,
+        test_name = test_name,
+        test_dir = test_dir_abs,
+        platform = platform,
+        simulator = simulator,
+        tiling = tiling,
+        gen_dir = gen_dir,
+        build_dir = build_dir,
+        toolchain = args.toolchain,
+        toolchain_install_dir = args.toolchain_install_dir,
+        cmake_args = cmake_args_list,
+        gen_args = gen_args_list,
+        verbose = args.verbose,
+        debug = args.debug,
     )
 
     return config
 
 
 def print_colored_result(result, test_name: str):
-    
+
     GREEN = '\033[92m'
     RED = '\033[91m'
     RESET = '\033[0m'
@@ -253,39 +255,39 @@ def print_colored_result(result, test_name: str):
 
 
 def print_configuration(config: DeeployTestConfig):
-    
+
     CYAN = '\033[96m'
     BOLD = '\033[1m'
     RESET = '\033[0m'
-    
+
     print(f"\n{BOLD}{CYAN}═══════════════════════════════════════════════════════════════{RESET}")
     print(f"{BOLD}{CYAN}                    Deeploy Test Configuration                 {RESET}")
     print(f"{BOLD}{CYAN}═══════════════════════════════════════════════════════════════{RESET}\n")
-    
+
     print(f"{BOLD}Test Configuration:{RESET}")
     print(f"  Test Name           : {config.test_name}")
     print(f"  Test Directory      : {config.test_dir}")
     print(f"  Generation Directory: {config.gen_dir}")
     print(f"  Build Directory     : {config.build_dir}")
-    
+
     print(f"\n{BOLD}Platform Configuration:{RESET}")
     print(f"  Platform            : {config.platform}")
     print(f"  Simulator           : {config.simulator}")
     print(f"  Tiling Enabled      : {'Yes' if config.tiling else 'No'}")
-    
+
     print(f"\n{BOLD}Build Configuration:{RESET}")
     print(f"  Toolchain           : {config.toolchain}")
     if config.toolchain_install_dir:
         print(f"  Toolchain Directory : {config.toolchain_install_dir}")
     if config.cmake_args:
         print(f"  CMake Arguments     : {' '.join(config.cmake_args)}")
-    
+
     print(f"\n{BOLD}Runtime Configuration:{RESET}")
     print(f"  Verbosity Level     : {config.verbose}")
     print(f"  Debug Mode          : {'Enabled' if config.debug else 'Disabled'}")
     if config.gen_args:
         print(f"  Generation Arguments: {' '.join(config.gen_args)}")
-    
+
     print(f"\n{BOLD}{CYAN}═══════════════════════════════════════════════════════════════{RESET}\n")
 
 
@@ -294,7 +296,7 @@ def main(default_platform: Optional[str] = None,
          tiling_enabled: bool = False,
          platform_specific_cmake_args: Optional[list] = None,
          parsed_args: Optional[argparse.Namespace] = None,
-         parser_setup_callback=None):
+         parser_setup_callback = None):
     """
     Main entry point for Deeploy test runners.
     
@@ -309,13 +311,13 @@ def main(default_platform: Optional[str] = None,
 
     if parsed_args is None:
         # Make -p optional if default_platform is provided
-        parser = DeeployRunnerArgumentParser(tiling_arguments=tiling_enabled, 
-                                            platform_required=(default_platform is None))
-        
+        parser = DeeployRunnerArgumentParser(tiling_arguments = tiling_enabled,
+                                             platform_required = (default_platform is None))
+
         # Allow platform-specific runners to add their own arguments
         if parser_setup_callback:
             parser_setup_callback(parser)
-        
+
         args = parser.parse_args()
     else:
         args = parsed_args
@@ -343,18 +345,20 @@ def main(default_platform: Optional[str] = None,
             RED = '\033[91m'
             BOLD = '\033[1m'
             RESET = '\033[0m'
-            print(f"\n{RED}{BOLD}ERROR: Platform mismatch!{RESET}", file=sys.stderr)
-            print(f"{RED}This runner is designed for the '{default_platform}' platform.{RESET}", file=sys.stderr)
-            print(f"{RED}You specified platform: '{args.platform}' (normalized to '{normalized_specified}'){RESET}\n", file=sys.stderr)
-            print(f"Please use one of the following options:", file=sys.stderr)
-            print(f"  1. Remove the '-p {args.platform}' argument to use the default platform", file=sys.stderr)
-            print(f"  2. Use the correct platform-specific runner script for '{normalized_specified}'", file=sys.stderr)
+            print(f"\n{RED}{BOLD}ERROR: Platform mismatch!{RESET}", file = sys.stderr)
+            print(f"{RED}This runner is designed for the '{default_platform}' platform.{RESET}", file = sys.stderr)
+            print(f"{RED}You specified platform: '{args.platform}' (normalized to '{normalized_specified}'){RESET}\n",
+                  file = sys.stderr)
+            print(f"Please use one of the following options:", file = sys.stderr)
+            print(f"  1. Remove the '-p {args.platform}' argument to use the default platform", file = sys.stderr)
+            print(f"  2. Use the correct platform-specific runner script for '{normalized_specified}'",
+                  file = sys.stderr)
             sys.exit(1)
 
     simulator = args.simulator if args.simulator else default_simulator
 
     if platform is None:
-        print("Error: Platform must be specified with -p or provided as default", file=sys.stderr)
+        print("Error: Platform must be specified with -p or provided as default", file = sys.stderr)
         sys.exit(1)
 
     if simulator is None:
@@ -370,17 +374,17 @@ def main(default_platform: Optional[str] = None,
         }
         simulator = simulator_map.get(platform, "host")
         log.info(f"No simulator specified, using default for {platform}: {simulator}")
-    
+
     # Extract platform-specific CMake args from parsed args if available
     if platform_specific_cmake_args is None:
         platform_specific_cmake_args = []
-    
+
     # Check for platform-specific arguments in args object and build CMake args
     if hasattr(args, 'cores'):
         platform_specific_cmake_args.append(f"-DNUM_CORES={args.cores}")
     elif hasattr(args, 'num_cores'):
         platform_specific_cmake_args.append(f"-DNUM_CORES={args.num_cores}")
-    
+
     if hasattr(args, 'num_clusters'):
         platform_specific_cmake_args.append(f"-DNUM_CLUSTERS={args.num_clusters}")
 
@@ -389,7 +393,7 @@ def main(default_platform: Optional[str] = None,
     print_configuration(config)
 
     try:
-        result = run_complete_test(config, skipgen=args.skipgen, skipsim=args.skipsim)
+        result = run_complete_test(config, skipgen = args.skipgen, skipsim = args.skipsim)
 
         print_colored_result(result, config.test_name)
 

From 9dc8b58e358758aa27c03e8dc5aab8069640775a Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 16 Jan 2026 12:21:33 +0100
Subject: [PATCH 50/51] Format and lint

---
 DeeployTest/deeployRunner_chimera.py                  | 3 ++-
 DeeployTest/deeployRunner_cortexm.py                  | 1 +
 DeeployTest/deeployRunner_generic.py                  | 1 +
 DeeployTest/deeployRunner_mempool.py                  | 3 ++-
 DeeployTest/deeployRunner_siracusa.py                 | 3 ++-
 DeeployTest/deeployRunner_snitch.py                   | 3 ++-
 DeeployTest/deeployRunner_softhier.py                 | 3 ++-
 DeeployTest/deeployRunner_tiled_siracusa.py           | 1 +
 DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py | 3 ++-
 DeeployTest/deeployRunner_tiled_snitch.py             | 3 ++-
 DeeployTest/testUtils/core/__init__.py                | 2 +-
 DeeployTest/testUtils/deeployRunner.py                | 6 ++++--
 DeeployTest/testUtils/pytestRunner.py                 | 1 -
 13 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/DeeployTest/deeployRunner_chimera.py b/DeeployTest/deeployRunner_chimera.py
index 1908c568a6..3026020338 100644
--- a/DeeployTest/deeployRunner_chimera.py
+++ b/DeeployTest/deeployRunner_chimera.py
@@ -4,9 +4,10 @@
 # SPDX-License-Identifier: Apache-2.0
 """Deeploy runner for Chimera platform."""
 
-from testUtils.deeployRunner import main
 import sys
 
+from testUtils.deeployRunner import main
+
 if __name__ == "__main__":
 
     # Define parser setup callback to add Chimera-specific arguments
diff --git a/DeeployTest/deeployRunner_cortexm.py b/DeeployTest/deeployRunner_cortexm.py
index b468abdeca..dddba473e5 100644
--- a/DeeployTest/deeployRunner_cortexm.py
+++ b/DeeployTest/deeployRunner_cortexm.py
@@ -4,6 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import sys
+
 from testUtils.deeployRunner import main
 
 if __name__ == "__main__":
diff --git a/DeeployTest/deeployRunner_generic.py b/DeeployTest/deeployRunner_generic.py
index ad006d3bc4..b0757e3a7e 100644
--- a/DeeployTest/deeployRunner_generic.py
+++ b/DeeployTest/deeployRunner_generic.py
@@ -4,6 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import sys
+
 from testUtils.deeployRunner import main
 
 if __name__ == "__main__":
diff --git a/DeeployTest/deeployRunner_mempool.py b/DeeployTest/deeployRunner_mempool.py
index bbbb505648..400cda92a9 100644
--- a/DeeployTest/deeployRunner_mempool.py
+++ b/DeeployTest/deeployRunner_mempool.py
@@ -4,9 +4,10 @@
 # SPDX-License-Identifier: Apache-2.0
 """Deeploy runner for MemPool platform."""
 
-from testUtils.deeployRunner import main
 import sys
 
+from testUtils.deeployRunner import main
+
 if __name__ == "__main__":
 
     # Define parser setup callback to add MemPool-specific arguments
diff --git a/DeeployTest/deeployRunner_siracusa.py b/DeeployTest/deeployRunner_siracusa.py
index 9b36471b76..b754a0c233 100644
--- a/DeeployTest/deeployRunner_siracusa.py
+++ b/DeeployTest/deeployRunner_siracusa.py
@@ -3,9 +3,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from testUtils.deeployRunner import main
 import sys
 
+from testUtils.deeployRunner import main
+
 if __name__ == "__main__":
 
     # Define parser setup callback to add Siracusa-specific arguments
diff --git a/DeeployTest/deeployRunner_snitch.py b/DeeployTest/deeployRunner_snitch.py
index be700591e9..aa97933319 100644
--- a/DeeployTest/deeployRunner_snitch.py
+++ b/DeeployTest/deeployRunner_snitch.py
@@ -3,9 +3,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from testUtils.deeployRunner import main
 import sys
 
+from testUtils.deeployRunner import main
+
 if __name__ == "__main__":
 
     # Define parser setup callback to add Snitch-specific arguments
diff --git a/DeeployTest/deeployRunner_softhier.py b/DeeployTest/deeployRunner_softhier.py
index e11469a2d6..9c9360770e 100644
--- a/DeeployTest/deeployRunner_softhier.py
+++ b/DeeployTest/deeployRunner_softhier.py
@@ -4,9 +4,10 @@
 # SPDX-License-Identifier: Apache-2.0
 """Deeploy runner for SoftHier platform."""
 
-from testUtils.deeployRunner import main
 import sys
 
+from testUtils.deeployRunner import main
+
 if __name__ == "__main__":
 
     # Define parser setup callback to add SoftHier-specific arguments
diff --git a/DeeployTest/deeployRunner_tiled_siracusa.py b/DeeployTest/deeployRunner_tiled_siracusa.py
index 01eea59858..2184a4105c 100644
--- a/DeeployTest/deeployRunner_tiled_siracusa.py
+++ b/DeeployTest/deeployRunner_tiled_siracusa.py
@@ -4,6 +4,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import sys
+
 from testUtils.deeployRunner import main
 
 if __name__ == "__main__":
diff --git a/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py b/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py
index 5624b6607e..6b58844327 100644
--- a/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py
+++ b/DeeployTest/deeployRunner_tiled_siracusa_w_neureka.py
@@ -3,9 +3,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from testUtils.deeployRunner import main
 import sys
 
+from testUtils.deeployRunner import main
+
 if __name__ == "__main__":
 
     # Define parser setup callback to add Siracusa+Neureka-specific arguments
diff --git a/DeeployTest/deeployRunner_tiled_snitch.py b/DeeployTest/deeployRunner_tiled_snitch.py
index 60ee08c7bf..d6e5ffd196 100644
--- a/DeeployTest/deeployRunner_tiled_snitch.py
+++ b/DeeployTest/deeployRunner_tiled_snitch.py
@@ -3,9 +3,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from testUtils.deeployRunner import main
 import sys
 
+from testUtils.deeployRunner import main
+
 if __name__ == "__main__":
 
     # Define parser setup callback to add Snitch-specific arguments
diff --git a/DeeployTest/testUtils/core/__init__.py b/DeeployTest/testUtils/core/__init__.py
index 053d678a9e..b08d81d35b 100644
--- a/DeeployTest/testUtils/core/__init__.py
+++ b/DeeployTest/testUtils/core/__init__.py
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from .config import DeeployTestConfig
-from .execution import (build_binary, configure_cmake, generate_network, run_complete_test, run_simulation)
+from .execution import build_binary, configure_cmake, generate_network, run_complete_test, run_simulation
 from .output_parser import TestResult
 from .paths import get_test_paths
 
diff --git a/DeeployTest/testUtils/deeployRunner.py b/DeeployTest/testUtils/deeployRunner.py
index 282e268a3b..7bc52cc9cc 100644
--- a/DeeployTest/testUtils/deeployRunner.py
+++ b/DeeployTest/testUtils/deeployRunner.py
@@ -6,12 +6,14 @@
 import codecs
 import os
 import sys
-import coloredlogs
 from pathlib import Path
 from typing import Optional
 
-from Deeploy.Logging import DEFAULT_FMT, DETAILED_FILE_LOG_FORMAT
+import coloredlogs
+
+from Deeploy.Logging import DEFAULT_FMT
 from Deeploy.Logging import DEFAULT_LOGGER as log
+from Deeploy.Logging import DETAILED_FILE_LOG_FORMAT
 
 from .core import DeeployTestConfig, run_complete_test
 from .core.paths import get_test_paths
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index 36a1b88200..875de8113b 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -7,7 +7,6 @@
 from typing import List, Literal, Optional
 
 from .core import DeeployTestConfig, get_test_paths, run_complete_test
-from .core.execution import build_binary, configure_cmake, run_simulation
 
 
 def get_worker_id() -> str:

From f81c378d8d46079156053fbc6408fb0e330171fe Mon Sep 17 00:00:00 2001
From: Victor Jung <jungvi@iis.ee.ethz.ch>
Date: Fri, 16 Jan 2026 13:50:58 +0100
Subject: [PATCH 51/51] Fix import and sanitize build folder path

---
 DeeployTest/testUtils/deeployRunner.py |  7 ++++++-
 DeeployTest/testUtils/pytestRunner.py  | 11 ++++++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/DeeployTest/testUtils/deeployRunner.py b/DeeployTest/testUtils/deeployRunner.py
index 7bc52cc9cc..ee3a8c2a20 100644
--- a/DeeployTest/testUtils/deeployRunner.py
+++ b/DeeployTest/testUtils/deeployRunner.py
@@ -183,7 +183,12 @@ def create_config_from_args(args: argparse.Namespace,
     test_dir = args.dir
     gen_dir, test_dir_abs, test_name = get_test_paths(test_dir, platform, base_dir = str(base_dir))
 
-    build_dir = str(base_dir / f"TEST_{platform.upper()}" / "build")
+    # Use worker-specific build directory to avoid collisions with parallel execution with pytest-xdist
+    worker_id = os.environ.get("PYTEST_XDIST_WORKER", "master")
+    if worker_id == "master":
+        build_dir = str(base_dir / f"TEST_{platform.upper()}" / "build_master")
+    else:
+        build_dir = str(base_dir / f"TEST_{platform.upper()}" / f"build_{worker_id}")
 
     cmake_args_list = list(args.cmake) if args.cmake else []
 
diff --git a/DeeployTest/testUtils/pytestRunner.py b/DeeployTest/testUtils/pytestRunner.py
index 875de8113b..021c897e75 100644
--- a/DeeployTest/testUtils/pytestRunner.py
+++ b/DeeployTest/testUtils/pytestRunner.py
@@ -6,7 +6,16 @@
 from pathlib import Path
 from typing import List, Literal, Optional
 
-from .core import DeeployTestConfig, get_test_paths, run_complete_test
+from .core import DeeployTestConfig, build_binary, configure_cmake, get_test_paths, run_complete_test, run_simulation
+
+__all__ = [
+    'get_worker_id',
+    'create_test_config',
+    'run_and_assert_test',
+    'build_binary',
+    'configure_cmake',
+    'run_simulation',
+]
 
 
 def get_worker_id() -> str: