diff --git a/.gitignore b/.gitignore
index e66a8b6..251b5d3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,10 @@
+# Ignore data files in notebooks folder
+notebooks/**/*.json
+notebooks/**/*.yaml
+notebooks/**/*.parquet
+notebooks/**/*.pkl
+notebooks/**/*.db
+
 # Ignore vscode settings
 .vscode/
 
diff --git a/notebooks/02_orcabridge_basic_usage.ipynb b/notebooks/02_orcabridge_basic_usage.ipynb
index 9b6b244..4c1f93a 100644
--- a/notebooks/02_orcabridge_basic_usage.ipynb
+++ b/notebooks/02_orcabridge_basic_usage.ipynb
@@ -803,9 +803,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import numpy as np\n",
-    "import tempfile\n",
     "import json\n",
+    "import tempfile\n",
+    "\n",
+    "import numpy as np\n",
     "\n",
     "\n",
     "def compute_stats(bin_file: PathLike, output_file=None):\n",
diff --git a/notebooks/03_orcabridge_qol_features.ipynb b/notebooks/03_orcabridge_qol_features.ipynb
index 38583c5..a28f686 100644
--- a/notebooks/03_orcabridge_qol_features.ipynb
+++ b/notebooks/03_orcabridge_qol_features.ipynb
@@ -100,11 +100,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from orcabridge.pod import function_pod\n",
     "import json\n",
     "import tempfile\n",
     "from pathlib import Path\n",
     "\n",
+    "from orcabridge.pod import function_pod\n",
+    "\n",
     "json_source = ob.GlobSource(\"json_file\", \"../examples/dataset2\", \"*.json\")\n",
     "\n",
     "\n",
diff --git a/notebooks/04_orcabridge_tracker.ipynb b/notebooks/04_orcabridge_tracker.ipynb
index f1b0b96..24fd052 100644
--- a/notebooks/04_orcabridge_tracker.ipynb
+++ b/notebooks/04_orcabridge_tracker.ipynb
@@ -23,10 +23,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from orcabridge.tracker import Tracker\n",
+    "from orcabridge.pod import function_pod\n",
     "from orcabridge.source import GlobSource\n",
     "from orcabridge.store import DirDataStore\n",
-    "from orcabridge.pod import function_pod"
+    "from orcabridge.tracker import Tracker"
    ]
   },
   {
@@ -62,9 +62,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import json\n",
     "import tempfile\n",
     "from pathlib import Path\n",
-    "import json\n",
+    "\n",
     "import yaml\n",
     "\n",
     "# use default data store location of `./pod_data`\n",
diff --git a/notebooks/05_orcabridge_dj_integration.ipynb b/notebooks/05_orcabridge_dj_integration.ipynb
index f843682..70cbeb8 100644
--- a/notebooks/05_orcabridge_dj_integration.ipynb
+++ b/notebooks/05_orcabridge_dj_integration.ipynb
@@ -35,14 +35,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from orcabridge.pod import function_pod\n",
-    "from orcabridge.source import GlobSource\n",
-    "from orcabridge.store import DirDataStore\n",
+    "import json\n",
     "import tempfile\n",
     "from pathlib import Path\n",
-    "import json\n",
+    "\n",
     "import yaml\n",
     "\n",
+    "from orcabridge.pod import function_pod\n",
+    "from orcabridge.source import GlobSource\n",
+    "from orcabridge.store import DirDataStore\n",
     "\n",
     "# define data source\n",
     "data_source = GlobSource(\n",
@@ -162,9 +163,10 @@
     }
    ],
    "source": [
-    "from orcabridge.dj.tracker import QueryTracker\n",
     "import datajoint as dj\n",
     "\n",
+    "from orcabridge.dj.tracker import QueryTracker\n",
+    "\n",
     "schema = dj.schema(\"enigma_orcabridge_test\")\n",
     "\n",
     "\n",
diff --git a/pyproject.toml b/pyproject.toml
index c8f1179..fe1e914 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,9 +8,13 @@ description = "Function-based Oracapod Pipeline implementation in Python"
 dynamic = ["version"]
 dependencies = [
     "xxhash",
- "networkx",
- "typing_extensions",
- "matplotlib>=3.10.3",
+    "networkx",
+    "typing_extensions",
+    "matplotlib>=3.10.3",
+    "pandas>=2.2.3",
+    "pyyaml>=6.0.2",
+    "pyarrow>=20.0.0",
+    "polars>=1.30.0",
 ]
 readme = "README.md"
 requires-python = ">=3.10"
@@ -36,10 +40,13 @@ version_file = "src/orcabridge/_version.py"
 
 [dependency-groups]
 dev = [
+    "deltalake>=1.0.2",
     "httpie>=3.2.4",
     "ipykernel>=6.29.5",
+    "pyiceberg>=0.9.1",
     "pytest>=8.3.5",
     "pytest-cov>=6.1.1",
     "redis>=6.2.0",
     "ruff>=0.11.11",
+    "tqdm>=4.67.1",
 ]
diff --git a/src/orcabridge/__init__.py b/src/orcabridge/__init__.py
index 675892a..6da00a9 100644
--- a/src/orcabridge/__init__.py
+++ b/src/orcabridge/__init__.py
@@ -1,15 +1,9 @@
-from . import hashing
-from . import pod
-from . import mapper
-from . import stream
-from . import source
-from . import store
-from .mapper import MapTags, MapPackets, Join, tag, packet
+from . import hashing, mappers, pod, sources, store, streams
+from .mappers import Join, MapPackets, MapTags, packet, tag
 from .pod import FunctionPod, function_pod
-from .source import GlobSource
+from .sources import GlobSource
 from .store import DirDataStore, SafeDirDataStore
-from .tracker import GraphTracker
-
+from .pipeline import GraphTracker
 
 DEFAULT_TRACKER = GraphTracker()
 DEFAULT_TRACKER.activate()
@@ -20,9 +14,9 @@
     "store",
     "pod",
     "dir_data_store",
-    "mapper",
-    "stream",
-    "source",
+    "mappers",
+    "streams",
+    "sources",
     "MapTags",
     "MapPackets",
     "Join",
diff --git a/src/orcabridge/base.py b/src/orcabridge/base.py
index f9c57c4..6a73048 100644
--- a/src/orcabridge/base.py
+++ b/src/orcabridge/base.py
@@ -1,11 +1,14 @@
-from orcabridge.hashing import HashableMixin
-from orcabridge.types import Tag, Packet
-from typing import Any
+# Collection of base classes for operations and streams in the orcabridge framework.
 import threading
-from collections.abc import Collection, Callable, Iterator
+from abc import ABC, abstractmethod
+from collections.abc import Callable, Collection, Iterator
+from typing import Any
+
+from orcabridge.hashing import HashableMixin
+from orcabridge.types import Packet, Tag
 
 
-class Operation(HashableMixin):
+class Operation(ABC, HashableMixin):
     """
     Operation defines a generic operation that can be performed on a stream of data.
     It is a base class for all operations that can be performed on a collection of streams
@@ -79,10 +82,24 @@ def __str__(self):
             return f"{self.__class__.__name__}({self._label})"
         return self.__class__.__name__
 
+    def claims_unique_tags(
+        self, *streams: "SyncStream", trigger_run: bool = True
+    ) -> bool:
+        """
+        Returns True if the operation claims that it has unique tags, False otherwise.
+        This method is useful for checking if the operation can be used as a source
+        for other operations that require unique tags.
+        Subclasses should override this method if it can provide reasonable check/guarantee
+        of unique tags. The default implementation returns False, meaning that the operation
+        does not claim to have unique tags.
+        """
+        return False
+
+    @abstractmethod
     def forward(self, *streams: "SyncStream") -> "SyncStream": ...
 
 
-class Tracker:
+class Tracker(ABC):
     """
     A tracker is a class that can track the invocations of operations. Only "active" trackers
     participate in tracking and its `record` method gets called on each invocation of an operation.
@@ -124,9 +141,12 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_val, ext_tb):
         self.deactivate()
 
+    @abstractmethod
     def record(self, invocation: "Invocation") -> None: ...
 
 
+# This is NOT an abstract class, but rather a concrete class that
+# represents an invocation of an operation on a collection of streams.
 class Invocation(HashableMixin):
     """
     This class represents an invocation of an operation on a collection of streams.
@@ -138,6 +158,7 @@ class Invocation(HashableMixin):
     def __init__(
         self,
         operation: Operation,
+        # TODO: technically this should be Stream to stay consistent with Stream interface
         streams: Collection["SyncStream"],
     ) -> None:
         self.operation = operation
@@ -171,8 +192,20 @@ def __lt__(self, other: Any) -> bool:
         # otherwise, order by the operation
         return hash(self.operation) < hash(other.operation)
 
+    def claims_unique_tags(self, trigger_run: bool = True) -> bool:
+        """
+        Returns True if the invocation has unique tags, False otherwise.
+        This method is useful for checking if the invocation can be used as a source
+        for other operations that require unique tags. None is returned if the
+        uniqueness of tags cannot be determined.
+        Note that uniqueness is best thought of as a "claim" by the operation
+        that it has unique tags. The actual uniqueness can only be verified
+        by iterating over the streams and checking the tags.
+        """
+        return self.operation.claims_unique_tags(*self.streams, trigger_run=trigger_run)
+
 
-class Stream(HashableMixin):
+class Stream(ABC, HashableMixin):
     """
     A stream is a collection of tagged-packets that are generated by an operation.
     The stream is iterable and can be used to access the packets in the stream.
@@ -242,6 +275,20 @@ def keys(self) -> tuple[Collection[str] | None, Collection[str] | None]:
         tag, packet = next(iter(self))
         return list(tag.keys()), list(packet.keys())
 
+    def claims_unique_tags(self) -> bool:
+        """
+        Returns True if the stream has unique tags, False otherwise.
+        This method is useful for checking if the stream can be used as a source
+        for other operations that require unique tags. None is returned if the
+        uniqueness of tags cannot be determined.
+        If the stream is generated by an operation, the invocation is consulted for
+        the information about unique tags.
+        """
+        if self.invocation is not None:
+            return self.invocation.claims_unique_tags()
+        return False
+
+    @abstractmethod
     def __iter__(self) -> Iterator[tuple[Tag, Packet]]:
         raise NotImplementedError("Subclasses must implement __iter__ method")
 
@@ -260,6 +307,29 @@ class SyncStream(Stream):
     will have to wait for the stream to finish before proceeding.
     """
 
+    def claims_unique_tags(self, *, trigger_run=True) -> bool:
+        """
+        For synchronous streams, if the stream is generated by an operation, the invocation
+        is consulted first to see if the uniqueness of tags can be determined without iterating over the stream.
+        If uniqueness cannot be determined from the invocation and if trigger_run is True, uniqueness is checked
+        by iterating over all elements and verifying uniqueness.
+        Consequently, this may trigger upstream computations and can be expensive.
+        If trigger_run is False, the method will return None if the uniqueness cannot be determined.
+        Since this consults the invocation, the resulting value is ultimately a claim and not a guarantee
+        of uniqueness. If guarantee of uniquess is required, then use has_unique_tags method
+        """
+        result = super().claims_unique_tags()
+        if result is not None or not trigger_run:
+            return result
+
+        # If the uniqueness cannot be determined from the invocation, iterate over the stream
+        unique_tags = set()
+        for idx, (tag, packet) in enumerate(self):
+            if tag in unique_tags:
+                return False
+            unique_tags.add(tag)
+        return True
+
     def head(self, n: int = 5) -> None:
         """
         Print the first n elements of the stream.
@@ -281,7 +351,7 @@ def __len__(self) -> int:
         return sum(1 for _ in self)
 
     def __rshift__(
-        self, transformer: Callable[["SyncStream"], "SyncStream"]
+        self, transformer: dict | Callable[["SyncStream"], "SyncStream"]
     ) -> "SyncStream":
         """
         Returns a new stream that is the result of applying the mapping to the stream.
@@ -289,19 +359,24 @@ def __rshift__(
         are returned in a new stream.
         """
         # TODO: remove just in time import
-        from .mapper import MapPackets
+        from .mappers import MapPackets
 
         if isinstance(transformer, dict):
             return MapPackets(transformer)(self)
         elif isinstance(transformer, Callable):
             return transformer(self)
 
+        # Otherwise, do not know how to handle the transformer
+        raise TypeError(
+            "transformer must be a dictionary or a callable that takes a SyncStream"
+        )
+
     def __mul__(self, other: "SyncStream") -> "SyncStream":
         """
         Returns a new stream that is the result joining with the other stream
         """
         # TODO: remove just in time import
-        from .mapper import Join
+        from .mappers import Join
 
         if not isinstance(other, SyncStream):
             raise TypeError("other must be a SyncStream")
@@ -321,6 +396,9 @@ class Source(Operation, SyncStream):
     type of Operation that takes no input and produces a stream of packets.
     For convenience, the source itself is also a stream and thus can be used
     as an input to other operations directly.
+    However, note that Source is still best thought of as an Operation that
+    produces a stream of packets, rather than a stream itself. On almost all occasions,
+    Source acts as an Operation.
     """
 
     def __init__(self, label: str | None = None, **kwargs) -> None:
diff --git a/src/orcabridge/dj/mapper.py b/src/orcabridge/dj/mapper.py
index 79d7a6c..d3f2d69 100644
--- a/src/orcabridge/dj/mapper.py
+++ b/src/orcabridge/dj/mapper.py
@@ -1,8 +1,9 @@
-from .stream import QueryStream
-from .operation import QueryOperation
-from ..mapper import Mapper, Join, MapPackets, MapTags
-from typing import Optional
 import warnings
+from typing import Optional
+
+from orcabridge.mappers import Join, MapPackets, Mapper, MapTags
+from .operation import QueryOperation
+from .stream import QueryStream
 
 
 class QueryMapper(QueryOperation, Mapper):
diff --git a/src/orcabridge/dj/operation.py b/src/orcabridge/dj/operation.py
index 0259308..d4d5a81 100644
--- a/src/orcabridge/dj/operation.py
+++ b/src/orcabridge/dj/operation.py
@@ -1,5 +1,5 @@
-from .stream import QueryStream
 from ..base import Operation
+from .stream import QueryStream
 
 
 class QueryOperation(Operation):
diff --git a/src/orcabridge/dj/pod.py b/src/orcabridge/dj/pod.py
index e278e3c..815b2dc 100644
--- a/src/orcabridge/dj/pod.py
+++ b/src/orcabridge/dj/pod.py
@@ -1,15 +1,16 @@
-from .stream import QueryStream, TableStream, TableCachedStream
-from ..utils.name import pascal_to_snake, snake_to_pascal
-from .operation import QueryOperation
-from ..pod import Pod, FunctionPod
-from .source import QuerySource
-from .mapper import JoinQuery
+import logging
+from typing import Collection, Optional, Tuple
+
 import datajoint as dj
 from datajoint import Schema
-from typing import Collection, Tuple, Optional
 from datajoint.table import Table
 
-import logging
+from ..pod import FunctionPod, Pod
+from ..utils.name import pascal_to_snake, snake_to_pascal
+from .mapper import JoinQuery
+from .operation import QueryOperation
+from .source import QuerySource
+from .stream import QueryStream, TableCachedStream, TableStream
 
 logger = logging.getLogger(__name__)
 
@@ -37,9 +38,7 @@ def __init__(
         self.fp = fp
         self.schema = schema
         self.table_name = (
-            table_name
-            if table_name is not None
-            else pascal_to_snake(fp.function.__name__)
+            table_name if table_name is not None else pascal_to_snake(fp.function_name)
         ) + (f"_{table_postfix}" if table_postfix else "")
         self.streams = streams if streams is not None else []
         self.table = None
@@ -57,7 +56,7 @@ def identity_structure(self, *streams):
     @property
     def label(self) -> str:
         if self._label is None:
-            return snake_to_pascal(self.fp.function.__name__)
+            return snake_to_pascal(self.fp.function_name)
         return self._label
 
     def prepare_source_query(self) -> Tuple[QueryStream, Collection[Table]]:
diff --git a/src/orcabridge/dj/source.py b/src/orcabridge/dj/source.py
index bde1ed4..cbcf0d7 100644
--- a/src/orcabridge/dj/source.py
+++ b/src/orcabridge/dj/source.py
@@ -1,15 +1,17 @@
-from ..source import Source
-from .stream import QueryStream, TableCachedStream, TableStream
-from .operation import QueryOperation
-from ..stream import SyncStream
-from datajoint import Table
-from typing import Any, Collection, Union, Optional
-from datajoint import Schema
+import logging
+from typing import Any, Collection, Optional, Union
+
 import datajoint as dj
+from datajoint import Schema, Table
+
+from orcabridge.hashing import hash_to_uuid
+
+from orcabridge.sources import Source
+from orcabridge.streams import SyncStream
 from ..utils.name import pascal_to_snake, snake_to_pascal
 from ..utils.stream_utils import common_elements
-import logging
-from orcabridge.hashing import hash_to_uuid
+from .operation import QueryOperation
+from .stream import QueryStream, TableCachedStream, TableStream
 
 logger = logging.getLogger(__name__)
 
diff --git a/src/orcabridge/dj/stream.py b/src/orcabridge/dj/stream.py
index 1bbefe8..c8677f5 100644
--- a/src/orcabridge/dj/stream.py
+++ b/src/orcabridge/dj/stream.py
@@ -1,12 +1,11 @@
-from ..stream import SyncStream
 import copy
-
+import logging
+from typing import Any, Collection, Union
 
 from datajoint.expression import QueryExpression
 from datajoint.table import Table
-from typing import Collection, Any, Union
-import logging
 
+from orcabridge.streams import SyncStream
 
 logger = logging.getLogger(__name__)
 
@@ -57,10 +56,9 @@ def __and__(self, other: Any) -> "QueryStream":
         """
         Restrict the query stream by `other` and return a new query stream
         """
-        from .mapper import RestrictQuery
-
         # lazy load to avoid circular import
-        from ..source import TableSource
+        from .source import TableSource
+        from .mapper import RestrictQuery
 
         if isinstance(other, TableSource):
             other = other.table
diff --git a/src/orcabridge/dj/tracker.py b/src/orcabridge/dj/tracker.py
index 9be4eac..4e92273 100644
--- a/src/orcabridge/dj/tracker.py
+++ b/src/orcabridge/dj/tracker.py
@@ -1,20 +1,21 @@
-from orcabridge.tracker import GraphTracker
-from datajoint import Schema
-from typing import Collection, Tuple, Optional, Any
+import sys
+from collections import defaultdict
 from types import ModuleType
-import networkx as nx
+from typing import Any, Collection, Optional, Tuple
 
+import networkx as nx
+from datajoint import Schema
 
 from orcabridge.base import Operation, Source
-from orcabridge.mapper import Mapper, Merge
+from orcabridge.mappers import Mapper, Merge
 from orcabridge.pod import FunctionPod
-from .stream import QueryStream
-from .source import TableCachedSource, MergedQuerySource
+from orcabridge.pipeline import GraphTracker
+
+from .mapper import convert_to_query_mapper
 from .operation import QueryOperation
 from .pod import TableCachedPod
-from .mapper import convert_to_query_mapper
-import sys
-from collections import defaultdict
+from .source import MergedQuerySource, TableCachedSource
+from .stream import QueryStream
 
 
 def convert_to_query_operation(
diff --git a/src/orcabridge/hashing/__init__.py b/src/orcabridge/hashing/__init__.py
index d6809c0..2b1c5a4 100644
--- a/src/orcabridge/hashing/__init__.py
+++ b/src/orcabridge/hashing/__init__.py
@@ -1,19 +1,17 @@
-from .types import FileHasher, StringCacher, ObjectHasher
-
 from .core import (
+    HashableMixin,
+    function_content_hash,
+    get_function_signature,
     hash_file,
-    hash_pathset,
+    hash_function,
     hash_packet,
+    hash_pathset,
     hash_to_hex,
     hash_to_int,
     hash_to_uuid,
-    HashableMixin,
-    function_content_hash,
-    get_function_signature,
-    hash_function,
 )
-
 from .defaults import get_default_composite_hasher
+from .types import FileHasher, ObjectHasher, StringCacher
 
 __all__ = [
     "FileHasher",
diff --git a/src/orcabridge/hashing/core.py b/src/orcabridge/hashing/core.py
index 33e45b4..dd4f6a5 100644
--- a/src/orcabridge/hashing/core.py
+++ b/src/orcabridge/hashing/core.py
@@ -6,30 +6,32 @@
 suitable for arbitrarily nested data structures and custom objects via HashableMixin.
 """
 
-from functools import partial
 import hashlib
+import inspect
 import json
 import logging
-from uuid import UUID
+import zlib
+from functools import partial
+from os import PathLike
+from pathlib import Path
 from typing import (
     Any,
-    Dict,
-    Optional,
-    Union,
+    Callable,
     Collection,
+    Dict,
+    Literal,
     Mapping,
-    TypeVar,
+    Optional,
     Set,
-    Callable,
-    Literal,
+    TypeVar,
+    Union,
 )
-from pathlib import Path
-from os import PathLike
+from uuid import UUID
+
 import xxhash
-import zlib
-from orcabridge.types import PathSet, Packet
+
+from orcabridge.types import Packet, PathSet
 from orcabridge.utils.name import find_noncolliding_name
-import inspect
 
 # Configure logging with __name__ for proper hierarchy
 logger = logging.getLogger(__name__)
@@ -174,7 +176,7 @@ def content_hash(self, char_count: Optional[int] = 16) -> str:
         if structure is None:
             logger.warning(
                 f"HashableMixin.content_hash called on {self.__class__.__name__} "
-                "instance without identity_structure() implementation. "
+                "instance that returned identity_structure() of None. "
                 "Using class name as default identity, which may not correctly reflect object uniqueness."
             )
             # Fall back to class name for consistent behavior
@@ -773,7 +775,10 @@ def hash_file(file_path, algorithm="sha256", buffer_size=65536) -> str:
 
 
 def get_function_signature(
-    func: Callable, include_defaults: bool = True, include_module: bool = True
+    func: Callable,
+    name_override: str | None = None,
+    include_defaults: bool = True,
+    include_module: bool = True,
 ) -> str:
     """
     Get a stable string representation of a function's signature.
@@ -796,7 +801,7 @@ def get_function_signature(
         parts.append(f"module:{func.__module__}")
 
     # Add function name
-    parts.append(f"name:{func.__name__}")
+    parts.append(f"name:{name_override or func.__name__}")
 
     # Add parameters
     param_strs = []
@@ -830,6 +835,7 @@ def _is_in_string(line, pos):
 
 def get_function_components(
     func: Callable,
+    name_override: str | None = None,
     include_name: bool = True,
     include_module: bool = True,
     include_declaration: bool = True,
@@ -860,7 +866,7 @@ def get_function_components(
 
     # Add function name
     if include_name:
-        components.append(f"name:{func.__name__}")
+        components.append(f"name:{name_override or func.__name__}")
 
     # Add module
     if include_module and hasattr(func, "__module__"):
@@ -913,7 +919,7 @@ def get_function_components(
 
     except (IOError, TypeError):
         # If source can't be retrieved, fall back to signature
-        components.append(f"name:{func.__name__}")
+        components.append(f"name:{name_override or func.__name__}")
         try:
             sig = inspect.signature(func)
             components.append(f"signature:{str(sig)}")
@@ -982,6 +988,7 @@ def hash_function(
     function: Callable,
     function_hash_mode: Literal["content", "signature", "name"] = "content",
     return_type: Literal["hex", "int", "uuid"] = "hex",
+    name_override: Optional[str] = None,
     content_kwargs=None,
     hash_kwargs=None,
 ) -> Union[str, int, UUID]:
@@ -996,7 +1003,7 @@ def hash_function(
             extractors:
             - "content": arguments for get_function_components
             - "signature": arguments for get_function_signature
-            - "name": no underlying function used - simply function.__name__
+            - "name": no underlying function used - simply function.__name__ or name_override if provided
         hash_kwargs: Additional arguments for the hashing function that depends on the return type
             - "hex": arguments for hash_to_hex
             - "int": arguments for hash_to_int
@@ -1016,14 +1023,19 @@ def hash_function(
 
     logger.debug(
         f"Hashing function '{function.__name__}' using mode '{function_hash_mode}'"
+        + (f" with name override '{name_override}'" if name_override else "")
     )
 
     if function_hash_mode == "content":
-        hash_content = "\n".join(get_function_components(function, **content_kwargs))
+        hash_content = "\n".join(
+            get_function_components(
+                function, name_override=name_override, **content_kwargs
+            )
+        )
     elif function_hash_mode == "signature":
         hash_content = get_function_signature(function, **content_kwargs)
     elif function_hash_mode == "name":
-        hash_content = function.__name__
+        hash_content = name_override or function.__name__
     else:
         err_msg = f"Unknown function_hash_mode: {function_hash_mode}"
         logger.error(err_msg)
diff --git a/src/orcabridge/hashing/defaults.py b/src/orcabridge/hashing/defaults.py
index fe463a5..2f65a7d 100644
--- a/src/orcabridge/hashing/defaults.py
+++ b/src/orcabridge/hashing/defaults.py
@@ -10,3 +10,9 @@ def get_default_composite_hasher(with_cache=True) -> CompositeHasher:
         string_cacher = InMemoryCacher(max_size=None)
         return HasherFactory.create_cached_composite(string_cacher)
     return HasherFactory.create_basic_composite()
+
+
+def get_default_composite_hasher_with_cacher(cacher=None) -> CompositeHasher:
+    if cacher is None:
+        cacher = InMemoryCacher(max_size=None)
+    return HasherFactory.create_cached_composite(cacher)
diff --git a/src/orcabridge/hashing/file_hashers.py b/src/orcabridge/hashing/file_hashers.py
index d86e748..bf3365a 100644
--- a/src/orcabridge/hashing/file_hashers.py
+++ b/src/orcabridge/hashing/file_hashers.py
@@ -1,10 +1,10 @@
-from orcabridge.types import PathLike, PathSet, Packet
-from orcabridge.hashing.core import hash_file, hash_pathset, hash_packet
+from orcabridge.hashing.core import hash_file, hash_packet, hash_pathset
 from orcabridge.hashing.types import (
     FileHasher,
     PathSetHasher,
     StringCacher,
 )
+from orcabridge.types import Packet, PathLike, PathSet
 
 
 # Completely unnecessary to inherit from FileHasher, but this
diff --git a/src/orcabridge/hashing/files.py b/src/orcabridge/hashing/files.py
index 9f35a5f..3a70b9d 100644
--- a/src/orcabridge/hashing/files.py
+++ b/src/orcabridge/hashing/files.py
@@ -1,8 +1,9 @@
-from orcabridge.types import PathLike, PathSet, Packet
+import threading
 from typing import Optional
-from orcabridge.hashing.core import hash_file, hash_pathset, hash_packet
+
+from orcabridge.hashing.core import hash_file, hash_packet, hash_pathset
 from orcabridge.hashing.types import FileHasher, StringCacher
-import threading
+from orcabridge.types import Packet, PathLike, PathSet
 
 
 # Completely unnecessary to inherit from FileHasher, but this
diff --git a/src/orcabridge/hashing_legacy.py b/src/orcabridge/hashing/hashing_legacy.py
similarity index 98%
rename from src/orcabridge/hashing_legacy.py
rename to src/orcabridge/hashing/hashing_legacy.py
index 2e325bb..353a4f9 100644
--- a/src/orcabridge/hashing_legacy.py
+++ b/src/orcabridge/hashing/hashing_legacy.py
@@ -42,11 +42,11 @@
 # def hash_function(function, function_hash_mode: str = "content", hasher_kwargs=None) -> str:
 #     """
 #     Hash a function based on its content, signature, or name.
-
+#
 #     Args:
 #         function: The function to hash
 #         function_hash_mode: The mode of hashing ('content', 'signature', 'name')
-#         store_name: Optional name for the store
+#         function_name: Optional name for the function (if not provided, uses function's __name__)
 
 #     Returns:
 #         A string representing the hash of the function
diff --git a/src/orcabridge/hashing/string_cachers.py b/src/orcabridge/hashing/string_cachers.py
index 75fb91e..817aa44 100644
--- a/src/orcabridge/hashing/string_cachers.py
+++ b/src/orcabridge/hashing/string_cachers.py
@@ -4,7 +4,7 @@
 import sqlite3
 import threading
 from pathlib import Path
-from typing import Any, TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from orcabridge.hashing.types import StringCacher
 
@@ -22,6 +22,60 @@
         redis = None
 
 
+class TransferCacher(StringCacher):
+    """
+    Takes two string cachers as source and destination. Everytime a cached value is retrieved from source,
+    the value is also set in the destination cacher.
+    This is useful for transferring cached values between different caching mechanisms.
+    """
+
+    def __init__(self, source: StringCacher, destination: StringCacher):
+        """
+        Initialize the TransferCacher.
+
+        Args:
+            source: The source cacher to read from
+            destination: The destination cacher to write to
+        """
+        self.source = source
+        self.destination = destination
+
+    def transfer(self, cache_key: str) -> str | None:
+        """
+        Transfer a cached value from source to destination.
+
+        Args:
+            cache_key: The key to transfer
+
+        Returns:
+            The cached value if found, otherwise None
+        """
+        # Try to get the cached value from the source
+        value = self.source.get_cached(cache_key)
+        if value is not None:
+            # Set it in the destination cacher
+            self.destination.set_cached(cache_key, value)
+        return value
+
+    def get_cached(self, cache_key: str) -> str | None:
+        # try to get the cached value from the destination first
+        value = self.destination.get_cached(cache_key)
+        if value is not None:
+            return value
+        # if not found in destination, get it from source
+        value = self.source.get_cached(cache_key)
+        if value is not None:
+            self.destination.set_cached(cache_key, value)
+        return value
+
+    def set_cached(self, cache_key: str, value: str) -> None:
+        # Only set the value in the destination cacher
+        self.destination.set_cached(cache_key, value)
+
+    def clear_cache(self) -> None:
+        self.destination.clear_cache()
+
+
 class InMemoryCacher(StringCacher):
     """Thread-safe in-memory LRU cache."""
 
@@ -628,7 +682,8 @@ def get_cached(self, cache_key: str) -> str | None:
                 result = self.redis.get(self._get_prefixed_key(cache_key))
                 if result is None:
                     return None
-
+                logger.info(f"Retrieved cached value from Redis for key {cache_key}")
+                # Decode bytes to string if necessary
                 if isinstance(result, bytes):
                     return result.decode("utf-8")
 
@@ -648,6 +703,8 @@ def set_cached(self, cache_key: str, value: str) -> None:
                 return
 
             try:
+                logger.info(f"Saving cached value to Redis for key {cache_key}")
+
                 self.redis.set(self._get_prefixed_key(cache_key), value)
 
             except (redis.RedisError, redis.ConnectionError) as e:
diff --git a/src/orcabridge/hashing/types.py b/src/orcabridge/hashing/types.py
index f0b9ce4..6dda6c0 100644
--- a/src/orcabridge/hashing/types.py
+++ b/src/orcabridge/hashing/types.py
@@ -1,8 +1,9 @@
 """Hash strategy protocols for dependency injection."""
 
 from abc import ABC, abstractmethod
-from typing import Protocol, Any, runtime_checkable
+from typing import Any, Protocol, runtime_checkable
 from uuid import UUID
+
 from orcabridge.types import Packet, PathLike, PathSet
 
 
@@ -19,7 +20,7 @@ def identity_structure(self) -> Any:
                  Should be deterministic and include all identity-relevant data.
                  Return None to indicate no custom identity is available.
         """
-        ...
+        pass  # pragma: no cover
 
 
 class ObjectHasher(ABC):
diff --git a/src/orcabridge/mapper.py b/src/orcabridge/mappers.py
similarity index 80%
rename from src/orcabridge/mapper.py
rename to src/orcabridge/mappers.py
index 81cdb60..4ced7ee 100644
--- a/src/orcabridge/mapper.py
+++ b/src/orcabridge/mappers.py
@@ -1,15 +1,21 @@
-from orcabridge.base import SyncStream, Mapper
-from orcabridge.stream import SyncStreamFromGenerator
+from collections import defaultdict
+from collections.abc import Callable, Collection, Iterator
+from itertools import chain
+from typing import Any
+
+
+from orcabridge.base import Mapper, SyncStream
+from orcabridge.hashing import function_content_hash, hash_function
+from orcabridge.streams import SyncStreamFromGenerator
 from orcabridge.utils.stream_utils import (
-    join_tags,
-    check_packet_compatibility,
-    batch_tag,
     batch_packet,
+    batch_tags,
+    check_packet_compatibility,
+    join_tags,
 )
-from orcabridge.hashing import hash_function, function_content_hash
-from .types import Tag, Packet
-from itertools import chain
-from collections.abc import Collection, Iterator, Callable
+from orcabridge.utils.stream_utils import fill_missing
+
+from .types import Packet, Tag
 
 
 class Repeat(Mapper):
@@ -20,6 +26,10 @@ class Repeat(Mapper):
 
     def __init__(self, repeat_count: int) -> None:
         super().__init__()
+        if not isinstance(repeat_count, int):
+            raise TypeError("repeat_count must be an integer")
+        if repeat_count < 0:
+            raise ValueError("repeat_count must be non-negative")
         self.repeat_count = repeat_count
 
     def identity_structure(self, *streams) -> tuple[str, int, set[SyncStream]]:
@@ -54,15 +64,18 @@ def generator() -> Iterator[tuple[Tag, Packet]]:
     def __repr__(self) -> str:
         return f"Repeat(count={self.repeat_count})"
 
+    def claims_unique_tags(
+        self, *streams: SyncStream, trigger_run: bool = True
+    ) -> bool:
+        if len(streams) != 1:
+            raise ValueError(
+                "Repeat operation only supports operating on a single input stream"
+            )
 
-def fill_missing(dict, keys, default=None):
-    """
-    Fill the missing keys in the dictionary with the specified default value.
-    """
-    for key in keys:
-        if key not in dict:
-            dict[key] = default
-    return dict
+        # Repeat's uniquness is true only if (1) input stream has unique tags and (2) repeat count is 1
+        return self.repeat_count == 1 and streams[0].claims_unique_tags(
+            trigger_run=trigger_run
+        )
 
 
 class Merge(Mapper):
@@ -106,6 +119,32 @@ def generator() -> Iterator[tuple[Tag, Packet]]:
     def __repr__(self) -> str:
         return "Merge()"
 
+    def claims_unique_tags(
+        self, *streams: SyncStream, trigger_run: bool = True
+    ) -> bool:
+        """
+        Merge operation can only claim unique tags if all input streams have unique tags AND
+        the tag keys are not identical across all streams.
+        """
+        if len(streams) < 2:
+            raise ValueError("Merge operation requires at least two streams")
+        # Check if all streams have unique tags
+        unique_tags = all(
+            stream.claims_unique_tags(trigger_run=trigger_run) for stream in streams
+        )
+        if not unique_tags:
+            return False
+        # check that all streams' tag keys are not identical
+        tag_key_pool = set()
+        for stream in streams:
+            tag_keys, packet_keys = stream.keys()
+            # TODO: re-evaluate the implication of having empty tag keys in uniqueness guarantee
+            if tag_keys is None or set(tag_keys) in tag_key_pool:
+                return False
+            tag_key_pool.add(frozenset(tag_keys))
+
+        return True
+
 
 class Join(Mapper):
     def identity_structure(self, *streams):
@@ -499,7 +538,7 @@ def __init__(
         super().__init__()
         self.batch_size = batch_size
         if tag_processor is None:
-            tag_processor = lambda tags: batch_tag(tags)  # noqa: E731
+            tag_processor = batch_tags  # noqa: E731
 
         self.tag_processor = tag_processor
         self.drop_last = drop_last
@@ -552,6 +591,74 @@ def identity_structure(self, *streams):
         ) + tuple(streams)
 
 
+class GroupBy(Mapper):
+    def __init__(
+        self,
+        group_keys: Collection[str] | None = None,
+        reduce_keys: bool = False,
+        selection_function: Callable[[Collection[tuple[Tag, Packet]]], Collection[bool]]
+        | None = None,
+    ) -> None:
+        super().__init__()
+        self.group_keys = group_keys
+        self.reduce_keys = reduce_keys
+        self.selection_function = selection_function
+
+    def identity_structure(self, *streams: SyncStream) -> Any:
+        struct = (self.__class__.__name__, self.group_keys, self.reduce_keys)
+        if self.selection_function is not None:
+            struct += (hash_function(self.selection_function),)
+        return struct + tuple(streams)
+
+    def forward(self, *streams: SyncStream) -> SyncStream:
+        if len(streams) != 1:
+            raise ValueError("GroupBy operation requires exactly one stream")
+
+        stream = streams[0]
+        stream_keys, packet_keys = stream.keys()
+        stream_keys = stream_keys or []
+        packet_keys = packet_keys or []
+        group_keys = self.group_keys if self.group_keys is not None else stream_keys
+
+        def generator() -> Iterator[tuple[Tag, Packet]]:
+            # step through all packets in the stream and group them by the specified keys
+            grouped_packets: dict[tuple, list[tuple[Tag, Packet]]] = defaultdict(list)
+            for tag, packet in stream:
+                key = tuple(tag.get(key, None) for key in group_keys)
+                grouped_packets[key].append((tag, packet))
+
+            for key, packets in grouped_packets.items():
+                if self.selection_function is not None:
+                    # apply the selection function to the grouped packets
+                    selected_packets = self.selection_function(packets)
+                    packets = [
+                        p for p, selected in zip(packets, selected_packets) if selected
+                    ]
+
+                if not packets:
+                    continue
+
+                # create a new tag that combines the group keys
+                # if reduce_keys is True, we only keep the group keys as a singular value
+                new_tag = {}
+                if self.reduce_keys:
+                    new_tag = {k: key[i] for i, k in enumerate(group_keys)}
+                    remaining_keys = set(stream_keys) - set(group_keys)
+                else:
+                    remaining_keys = set(stream_keys) | set(group_keys)
+                # for remaining keys return list of tag values
+                for k in remaining_keys:
+                    if k not in new_tag:
+                        new_tag[k] = [t.get(k, None) for t, _ in packets]
+                # combine all packets into a single packet
+                combined_packet = {
+                    k: [p.get(k, None) for _, p in packets] for k in packet_keys
+                }
+                yield new_tag, combined_packet
+
+        return SyncStreamFromGenerator(generator)
+
+
 class CacheStream(Mapper):
     """
     A Mapper that caches the packets in the stream, thus avoiding upstream recomputation.
diff --git a/src/orcabridge/pod.py b/src/orcabridge/pod.py
index 094842f..4caf774 100644
--- a/src/orcabridge/pod.py
+++ b/src/orcabridge/pod.py
@@ -1,32 +1,37 @@
+import functools
+import logging
+import pickle
+import warnings
+from abc import abstractmethod
+import sys
+from collections.abc import Callable, Collection, Iterable, Iterator
 from typing import (
-    Literal,
     Any,
+    Literal,
 )
-from collections.abc import Collection, Iterator
-from orcabridge.types import Tag, Packet, PodFunction, PathSet
-from orcabridge.hashing import hash_function, get_function_signature
+
 from orcabridge.base import Operation
-from orcabridge.stream import SyncStream, SyncStreamFromGenerator
-from orcabridge.mapper import Join
+from orcabridge.hashing import get_function_signature, hash_function
+from orcabridge.mappers import Join
 from orcabridge.store import DataStore, NoOpDataStore
-import functools
-import warnings
-import logging
+from orcabridge.streams import SyncStream, SyncStreamFromGenerator
+from orcabridge.types import Packet, PathSet, PodFunction, Tag
 
 logger = logging.getLogger(__name__)
 
 
 def function_pod(
     output_keys: Collection[str] | None = None,
-    store_name: str | None = None,
+    function_name: str | None = None,
     data_store: DataStore | None = None,
+    store_name: str | None = None,
     function_hash_mode: Literal["signature", "content", "name", "custom"] = "name",
     custom_hash: int | None = None,
     force_computation: bool = False,
     skip_memoization: bool = False,
     error_handling: Literal["raise", "ignore", "warn"] = "raise",
     **kwargs,
-):
+) -> Callable[..., "FunctionPod"]:
     """
     Decorator that wraps a function in a FunctionPod instance.
 
@@ -39,13 +44,32 @@ def function_pod(
         FunctionPod instance wrapping the decorated function
     """
 
-    def decorator(func):
-        # Create a FunctionPod instance with the function and parameters
+    def decorator(func) -> FunctionPod:
+        if func.__name__ == "<lambda>":
+            raise ValueError("Lambda functions cannot be used with function_pod")
+
+        if not hasattr(func, "__module__") or func.__module__ is None:
+            raise ValueError(
+                f"Function {func.__name__} must be defined at module level"
+            )
+
+        # Store the original function in the module for pickling purposes
+        # and make sure to change the name of the function
+        module = sys.modules[func.__module__]
+        base_function_name = func.__name__
+        new_function_name = f"_original_{func.__name__}"
+        setattr(module, new_function_name, func)
+        # rename the function to be consistent and make it pickleable
+        setattr(func, "__name__", new_function_name)
+        setattr(func, "__qualname__", new_function_name)
+
+        # Create the FunctionPod
         pod = FunctionPod(
             function=func,
             output_keys=output_keys,
-            store_name=store_name,
+            function_name=function_name or base_function_name,
             data_store=data_store,
+            store_name=store_name,
             function_hash_mode=function_hash_mode,
             custom_hash=custom_hash,
             force_computation=force_computation,
@@ -54,9 +78,6 @@ def decorator(func):
             **kwargs,
         )
 
-        # Update the metadata to make the pod look more like the original function
-        functools.update_wrapper(pod, func)
-
         return pod
 
     return decorator
@@ -64,7 +85,7 @@ def decorator(func):
 
 class Pod(Operation):
     """
-    A base class for all pods. A pod can be seen as a special type of operation that
+    An (abstract) base class for all pods. A pod can be seen as a special type of operation that
     only operates on the packet content without reading tags. Consequently, no operation
     of Pod can dependent on the tags of the packets. This is a design choice to ensure that
     the pods act as pure functions which is a necessary condition to guarantee reproducibility.
@@ -89,26 +110,30 @@ def __call__(self, *streams: SyncStream, **kwargs) -> SyncStream:
         stream = self.process_stream(*streams)
         return super().__call__(*stream, **kwargs)
 
-    def forward(self, *streams: SyncStream) -> SyncStream: ...
-
-    def process(self, packet: Packet) -> Packet: ...
-
 
 # TODO: reimplement the memoization as dependency injection
 
 
 class FunctionPod(Pod):
+    """
+    A pod that wraps a function and allows it to be used as an operation in a stream.
+    This pod can be used to apply a function to the packets in a stream, with optional memoization
+    and caching of results. It can also handle multiple output keys and error handling.
+    The function should accept keyword arguments that correspond to the keys in the packets.
+    The output of the function should be a path or a collection of paths that correspond to the output keys."""
+
     def __init__(
         self,
         function: PodFunction,
         output_keys: Collection[str] | None = None,
-        store_name=None,
+        function_name=None,
         data_store: DataStore | None = None,
+        store_name: str | None = None,
         function_hash_mode: Literal["signature", "content", "name", "custom"] = "name",
         custom_hash: int | None = None,
         label: str | None = None,
         force_computation: bool = False,
-        skip_cache_lookup: bool = False,
+        skip_memoization_lookup: bool = False,
         skip_memoization: bool = False,
         error_handling: Literal["raise", "ignore", "warn"] = "raise",
         _hash_function_kwargs: dict | None = None,
@@ -116,23 +141,22 @@ def __init__(
     ) -> None:
         super().__init__(label=label, **kwargs)
         self.function = function
-        if output_keys is None:
-            output_keys = []
-        self.output_keys = output_keys
-        if store_name is None:
+        self.output_keys = output_keys or []
+        if function_name is None:
             if hasattr(self.function, "__name__"):
-                store_name = getattr(self.function, "__name__")
+                function_name = getattr(self.function, "__name__")
             else:
                 raise ValueError(
-                    "store_name must be provided if function has no __name__ attribute"
+                    "function_name must be provided if function has no __name__ attribute"
                 )
 
-        self.store_name = store_name
+        self.function_name = function_name
         self.data_store = data_store if data_store is not None else NoOpDataStore()
+        self.store_name = store_name or function_name
         self.function_hash_mode = function_hash_mode
         self.custom_hash = custom_hash
         self.force_computation = force_computation
-        self.skip_cache_lookup = skip_cache_lookup
+        self.skip_memoization_lookup = skip_memoization_lookup
         self.skip_memoization = skip_memoization
         self.error_handling = error_handling
         self._hash_function_kwargs = _hash_function_kwargs
@@ -148,6 +172,36 @@ def keys(
         tag_keys, _ = stream[0].keys()
         return tag_keys, tuple(self.output_keys)
 
+    def is_memoized(self, packet: Packet) -> bool:
+        return self.retrieve_memoized(packet) is not None
+
+    def retrieve_memoized(self, packet: Packet) -> Packet | None:
+        """
+        Retrieve a memoized packet from the data store.
+        Returns None if no memoized packet is found.
+        """
+        return self.data_store.retrieve_memoized(
+            self.store_name,
+            self.content_hash(char_count=16),
+            packet,
+        )
+
+    def memoize(
+        self,
+        packet: Packet,
+        output_packet: Packet,
+    ) -> Packet:
+        """
+        Memoize the output packet in the data store.
+        Returns the memoized packet.
+        """
+        return self.data_store.memoize(
+            self.store_name,
+            self.content_hash(char_count=16),  # identity of this function pod
+            packet,
+            output_packet,
+        )
+
     def forward(self, *streams: SyncStream) -> SyncStream:
         # if multiple streams are provided, join them
         if len(streams) > 1:
@@ -161,12 +215,8 @@ def generator() -> Iterator[tuple[Tag, Packet]]:
             for tag, packet in stream:
                 output_values: list["PathSet"] = []
                 try:
-                    if not self.skip_cache_lookup:
-                        memoized_packet = self.data_store.retrieve_memoized(
-                            self.store_name,
-                            self.content_hash(char_count=16),
-                            packet,
-                        )
+                    if not self.skip_memoization_lookup:
+                        memoized_packet = self.retrieve_memoized(packet)
                     else:
                         memoized_packet = None
                     if not self.force_computation and memoized_packet is not None:
@@ -176,14 +226,10 @@ def generator() -> Iterator[tuple[Tag, Packet]]:
                     values = self.function(**packet)
 
                     if len(self.output_keys) == 0:
-                        output_values: list["PathSet"] = []
-                    elif (
-                        len(self.output_keys) == 1
-                        and values is not None
-                        and not isinstance(values, Collection)
-                    ):
-                        output_values = [values]
-                    elif isinstance(values, Collection):
+                        output_values = []
+                    elif len(self.output_keys) == 1:
+                        output_values = [values]  # type: ignore
+                    elif isinstance(values, Iterable):
                         output_values = list(values)  # type: ignore
                     elif len(self.output_keys) > 1:
                         raise ValueError(
@@ -192,7 +238,7 @@ def generator() -> Iterator[tuple[Tag, Packet]]:
 
                     if len(output_values) != len(self.output_keys):
                         raise ValueError(
-                            "Number of output keys does not match number of values returned by function"
+                            f"Number of output keys {len(self.output_keys)}:{self.output_keys} does not match number of values returned by function {len(output_values)}"
                         )
                 except Exception as e:
                     logger.error(f"Error processing packet {packet}: {e}")
@@ -211,12 +257,7 @@ def generator() -> Iterator[tuple[Tag, Packet]]:
                 if not self.skip_memoization:
                     # output packet may be modified by the memoization process
                     # e.g. if the output is a file, the path may be changed
-                    output_packet = self.data_store.memoize(
-                        self.store_name,
-                        self.content_hash(),  # identity of this function pod
-                        packet,
-                        output_packet,
-                    )
+                    output_packet = self.memoize(packet, output_packet)  # type: ignore
 
                 n_computed += 1
                 logger.info(f"Computed item {n_computed}")
@@ -235,18 +276,21 @@ def identity_structure(self, *streams) -> Any:
                 }
             function_hash_value = hash_function(
                 self.function,
+                name_override=self.function_name,
                 function_hash_mode="content",
                 content_kwargs=content_kwargs,
             )
         elif self.function_hash_mode == "signature":
             function_hash_value = hash_function(
                 self.function,
+                name_override=self.function_name,
                 function_hash_mode="signature",
                 content_kwargs=content_kwargs,
             )
         elif self.function_hash_mode == "name":
             function_hash_value = hash_function(
                 self.function,
+                name_override=self.function_name,
                 function_hash_mode="name",
                 content_kwargs=content_kwargs,
             )
diff --git a/src/orcabridge/source.py b/src/orcabridge/sources.py
similarity index 80%
rename from src/orcabridge/source.py
rename to src/orcabridge/sources.py
index d3273f0..71758bd 100644
--- a/src/orcabridge/source.py
+++ b/src/orcabridge/sources.py
@@ -1,15 +1,12 @@
-from orcabridge.types import Tag, Packet
-from orcabridge.hashing import hash_function
-from orcabridge.base import Source
-from orcabridge.stream import SyncStream, SyncStreamFromGenerator
-from typing import Any, Literal
+from collections.abc import Callable, Collection, Iterator
 from os import PathLike
 from pathlib import Path
-from collections.abc import Collection, Iterator, Callable
-
+from typing import Any, Literal
 
-class LoadFromSource(Source):
-    pass
+from orcabridge.base import Source
+from orcabridge.hashing import hash_function
+from orcabridge.streams import SyncStream, SyncStreamFromGenerator
+from orcabridge.types import Packet, Tag
 
 
 class GlobSource(Source):
@@ -43,31 +40,32 @@ class GlobSource(Source):
     ...                     lambda f: {'date': Path(f).stem[:8]})
     """
 
-    default_tag_function = lambda f: {"file_name": Path(f).stem}  # noqa: E731
+    @staticmethod
+    def default_tag_function(f: PathLike) -> Tag:
+        return {"file_name": Path(f).stem}  # noqa: E731
 
     def __init__(
         self,
         name: str,
         file_path: PathLike,
         pattern: str = "*",
+        absolute_path: bool = False,
         label: str | None = None,
-        tag_function: str | Callable[[PathLike], Tag] | None = None,
+        tag_function: Callable[[PathLike], Tag] | None = None,
         tag_function_hash_mode: Literal["content", "signature", "name"] = "name",
         expected_tag_keys: Collection[str] | None = None,
         **kwargs,
     ) -> None:
         super().__init__(label=label, **kwargs)
         self.name = name
+        file_path = Path(file_path)
+        if absolute_path:
+            file_path = file_path.resolve()
         self.file_path = file_path
         self.pattern = pattern
         self.expected_tag_keys = expected_tag_keys
-        if self.expected_tag_keys is None and isinstance(tag_function, str):
-            self.expected_tag_keys = [tag_function]
         if tag_function is None:
             tag_function = self.__class__.default_tag_function
-        elif isinstance(tag_function, str):
-            tag_key = tag_function
-            tag_function = lambda f: {tag_key: Path(f).stem}  # noqa: E731
         self.tag_function: Callable[[PathLike], Tag] = tag_function
         self.tag_function_hash_mode = tag_function_hash_mode
 
@@ -127,3 +125,17 @@ def identity_structure(self, *streams) -> Any:
             self.pattern,
             tag_function_hash,
         ) + tuple(streams)
+
+    def claims_unique_tags(
+        self, *streams: "SyncStream", trigger_run: bool = True
+    ) -> bool:
+        if len(streams) != 0:
+            raise ValueError(
+                "GlobSource does not support forwarding streams. "
+                "It generates its own stream from the file system."
+            )
+        # Claim uniqueness only if the default tag function is used
+        if self.tag_function == self.__class__.default_tag_function:
+            return True
+        # Otherwise, delegate to the base class
+        return super().claims_unique_tags(trigger_run=trigger_run)
diff --git a/src/orcabridge/store/__init__.py b/src/orcabridge/store/__init__.py
index 9c84ab5..66a68df 100644
--- a/src/orcabridge/store/__init__.py
+++ b/src/orcabridge/store/__init__.py
@@ -1,4 +1,4 @@
-from .dir_data_store import DirDataStore, NoOpDataStore, DataStore
+from .core import DataStore, DirDataStore, NoOpDataStore
 from .safe_dir_data_store import SafeDirDataStore
 
 __all__ = [
diff --git a/src/orcabridge/store/dir_data_store.py b/src/orcabridge/store/core.py
similarity index 85%
rename from src/orcabridge/store/dir_data_store.py
rename to src/orcabridge/store/core.py
index 46966da..89fe85e 100644
--- a/src/orcabridge/store/dir_data_store.py
+++ b/src/orcabridge/store/core.py
@@ -1,41 +1,35 @@
-from orcabridge.types import Packet
-from typing import Optional
+import json
+import logging
+import shutil
+from os import PathLike
 from pathlib import Path
+
 from orcabridge.hashing import hash_packet
 from orcabridge.hashing.defaults import get_default_composite_hasher
 from orcabridge.hashing.types import PacketHasher
-import shutil
-import logging
-import json
-from os import PathLike
+from orcabridge.store.types import DataStore
+from orcabridge.types import Packet
 
 logger = logging.getLogger(__name__)
 
 
-class DataStore:
-    def memoize(
-        self,
-        store_name: str,
-        content_hash: str,
-        packet: Packet,
-        output_packet: Packet,
-    ) -> Packet: ...
-
-    def retrieve_memoized(
-        self, store_name: str, content_hash: str, packet: Packet
-    ) -> Optional[Packet]: ...
-
-
 class NoOpDataStore(DataStore):
     """
     An empty data store that does not store anything.
     This is useful for testing purposes or when no memoization is needed.
     """
 
+    def __init__(self):
+        """
+        Initialize the NoOpDataStore.
+        This does not require any parameters.
+        """
+        pass
+
     def memoize(
         self,
-        store_name: str,
-        content_hash: str,
+        function_name: str,
+        function_hash: str,
         packet: Packet,
         output_packet: Packet,
         overwrite: bool = False,
@@ -43,8 +37,8 @@ def memoize(
         return output_packet
 
     def retrieve_memoized(
-        self, store_name: str, content_hash: str, packet: Packet
-    ) -> Optional[Packet]:
+        self, function_name: str, function_hash: str, packet: Packet
+    ) -> Packet | None:
         return None
 
 
@@ -67,7 +61,7 @@ def __init__(
         self.preserve_filename = preserve_filename
         self.overwrite = overwrite
         self.supplement_source = supplement_source
-        if packet_hasher is None:
+        if packet_hasher is None and not legacy_mode:
             packet_hasher = get_default_composite_hasher(with_cache=True)
         self.packet_hasher = packet_hasher
         self.legacy_mode = legacy_mode
@@ -75,8 +69,8 @@ def __init__(
 
     def memoize(
         self,
-        store_name: str,
-        content_hash: str,
+        function_name: str,
+        function_hash: str,
         packet: Packet,
         output_packet: Packet,
     ) -> Packet:
@@ -84,7 +78,7 @@ def memoize(
             packet_hash = hash_packet(packet, algorithm=self.legacy_algorithm)
         else:
             packet_hash = self.packet_hasher.hash_packet(packet)
-        output_dir = self.store_dir / store_name / content_hash / str(packet_hash)
+        output_dir = self.store_dir / function_name / function_hash / str(packet_hash)
         info_path = output_dir / "_info.json"
         source_path = output_dir / "_source.json"
 
@@ -138,20 +132,20 @@ def memoize(
             # retrieve back the memoized packet and return
             # TODO: consider if we want to return the original packet or the memoized one
             retrieved_output_packet = self.retrieve_memoized(
-                store_name, content_hash, packet
+                function_name, function_hash, packet
             )
             if retrieved_output_packet is None:
                 raise ValueError(f"Memoized packet {packet} not found after storing it")
             return retrieved_output_packet
 
     def retrieve_memoized(
-        self, store_name: str, content_hash: str, packet: Packet
+        self, function_name: str, function_hash: str, packet: Packet
     ) -> Packet | None:
         if self.legacy_mode:
             packet_hash = hash_packet(packet, algorithm=self.legacy_algorithm)
         else:
             packet_hash = self.packet_hasher.hash_packet(packet)
-        output_dir = self.store_dir / store_name / content_hash / str(packet_hash)
+        output_dir = self.store_dir / function_name / function_hash / str(packet_hash)
         info_path = output_dir / "_info.json"
         source_path = output_dir / "_source.json"
 
@@ -183,11 +177,11 @@ def retrieve_memoized(
             logger.info(f"No memoized output found for packet {packet}")
             return None
 
-    def clear_store(self, store_name: str) -> None:
+    def clear_store(self, function_name: str) -> None:
         # delete the folder self.data_dir and its content
-        shutil.rmtree(self.store_dir / store_name)
+        shutil.rmtree(self.store_dir / function_name)
 
-    def clear_all_stores(self, interactive=True, store_name="", force=False) -> None:
+    def clear_all_stores(self, interactive=True, function_name="", force=False) -> None:
         """
         Clear all stores in the data directory.
         This is a dangerous operation -- please double- and triple-check before proceeding!
@@ -197,10 +191,10 @@ def clear_all_stores(self, interactive=True, store_name="", force=False) -> None
                 If False, it will delete only if `force=True`. The user will be prompted
                 to type in the full name of the storage (as shown in the prompt)
                 to confirm deletion.
-            store_name (str): The name of the store to delete. If not using interactive mode,
+            function_name (str): The name of the function to delete. If not using interactive mode,
                 this must be set to the store_dir path in order to proceed with the deletion.
             force (bool): If True, delete the store without prompting the user for confirmation.
-                If False and interactive is False, the `store_name` must match the store_dir
+                If False and interactive is False, the `function_name` must match the store_dir
                 for the deletion to proceed.
         """
         # delete the folder self.data_dir and its content
@@ -212,14 +206,14 @@ def clear_all_stores(self, interactive=True, store_name="", force=False) -> None
             if confirm.lower() != "y":
                 logger.info("Aborting deletion of all stores")
                 return
-            store_name = input(
-                f"Type in the store name {self.store_dir} to confirm the deletion: "
+            function_name = input(
+                f"Type in the function name {self.store_dir} to confirm the deletion: "
             )
-            if store_name != str(self.store_dir):
+            if function_name != str(self.store_dir):
                 logger.info("Aborting deletion of all stores")
                 return
 
-        if not force and store_name != str(self.store_dir):
+        if not force and function_name != str(self.store_dir):
             logger.info(f"Aborting deletion of all stores in {self.store_dir}")
             return
 
diff --git a/src/orcabridge/store/file_ops.py b/src/orcabridge/store/file_ops.py
index 13c98a6..33675a0 100644
--- a/src/orcabridge/store/file_ops.py
+++ b/src/orcabridge/store/file_ops.py
@@ -1,8 +1,9 @@
 # file_ops.py - Atomic file operations module
 
-import os
 import logging
+import os
 from pathlib import Path
+
 from orcabridge.types import PathLike
 
 logger = logging.getLogger(__name__)
diff --git a/src/orcabridge/store/safe_dir_data_store.py b/src/orcabridge/store/safe_dir_data_store.py
index 03d45b2..548039f 100644
--- a/src/orcabridge/store/safe_dir_data_store.py
+++ b/src/orcabridge/store/safe_dir_data_store.py
@@ -1,16 +1,16 @@
 # safedirstore.py - SafeDirDataStore implementation
 
-import os
-import time
+import errno
+import fcntl
 import json
 import logging
-import fcntl
-import errno
-from pathlib import Path
+import os
+import time
 from contextlib import contextmanager
+from pathlib import Path
 from typing import Optional, Union
 
-from .file_ops import atomic_write, atomic_copy
+from .file_ops import atomic_copy, atomic_write
 
 logger = logging.getLogger(__name__)
 
@@ -203,16 +203,16 @@ def __init__(
         # Create the data directory if it doesn't exist
         self.store_dir.mkdir(parents=True, exist_ok=True)
 
-    def _get_output_dir(self, store_name, content_hash, packet):
+    def _get_output_dir(self, function_name, content_hash, packet):
         """Get the output directory for a specific packet"""
         from orcabridge.hashing.core import hash_dict
 
         packet_hash = hash_dict(packet)
-        return self.store_dir / store_name / content_hash / str(packet_hash)
+        return self.store_dir / function_name / content_hash / str(packet_hash)
 
     def memoize(
         self,
-        store_name: str,
+        function_name: str,
         content_hash: str,
         packet: dict,
         output_packet: dict,
@@ -222,7 +222,7 @@ def memoize(
         Uses file locking to ensure thread safety and process safety.
 
         Args:
-            store_name: Name of the store
+            function_name: Name of the function
             content_hash: Hash of the function/operation
             packet: Input packet
             output_packet: Output packet to memoize
@@ -234,7 +234,7 @@ def memoize(
             FileLockError: If the lock cannot be acquired
             ValueError: If the entry already exists and overwrite is False
         """
-        output_dir = self._get_output_dir(store_name, content_hash, packet)
+        output_dir = self._get_output_dir(function_name, content_hash, packet)
         info_path = output_dir / "_info.json"
         lock_path = output_dir / "_lock"
         completion_marker = output_dir / "_complete"
@@ -247,7 +247,7 @@ def memoize(
             with file_lock(lock_path, shared=True, timeout=self.lock_timeout):
                 if completion_marker.exists() and not self.overwrite:
                     logger.info(f"Entry already exists for packet {packet}")
-                    return self.retrieve_memoized(store_name, content_hash, packet)
+                    return self.retrieve_memoized(function_name, content_hash, packet)
         except FileLockError:
             logger.warning("Could not acquire shared lock to check completion status")
             # Continue to try with exclusive lock
@@ -264,7 +264,7 @@ def memoize(
                 logger.info(
                     f"Entry already exists for packet {packet} (verified with exclusive lock)"
                 )
-                return self.retrieve_memoized(store_name, content_hash, packet)
+                return self.retrieve_memoized(function_name, content_hash, packet)
 
             # Check for partial results and clean up if necessary
             partial_marker = output_dir / "_partial"
@@ -320,7 +320,7 @@ def memoize(
                 # Retrieve the memoized packet to ensure consistency
                 # We don't need to acquire a new lock since we already have an exclusive lock
                 return self._retrieve_without_lock(
-                    store_name, content_hash, packet, output_dir
+                    function_name, content_hash, packet, output_dir
                 )
 
             finally:
@@ -329,7 +329,7 @@ def memoize(
                     partial_marker.unlink(missing_ok=True)
 
     def retrieve_memoized(
-        self, store_name: str, content_hash: str, packet: dict
+        self, function_name: str, content_hash: str, packet: dict
     ) -> Optional[dict]:
         """
         Retrieve a memoized output packet.
@@ -337,7 +337,7 @@ def retrieve_memoized(
         Uses a shared lock to allow concurrent reads while preventing writes during reads.
 
         Args:
-            store_name: Name of the store
+            function_name: Name of the function
             content_hash: Hash of the function/operation
             packet: Input packet
 
@@ -345,21 +345,21 @@ def retrieve_memoized(
             The memoized output packet with paths adjusted to absolute paths,
             or None if the packet is not found
         """
-        output_dir = self._get_output_dir(store_name, content_hash, packet)
+        output_dir = self._get_output_dir(function_name, content_hash, packet)
         lock_path = output_dir / "_lock"
 
         # Use a shared lock for reading to allow concurrent reads
         try:
             with file_lock(lock_path, shared=True, timeout=self.lock_timeout):
                 return self._retrieve_without_lock(
-                    store_name, content_hash, packet, output_dir
+                    function_name, content_hash, packet, output_dir
                 )
         except FileLockError:
             logger.warning(f"Could not acquire shared lock to read {output_dir}")
             return None
 
     def _retrieve_without_lock(
-        self, store_name: str, content_hash: str, packet: dict, output_dir: Path
+        self, function_name: str, content_hash: str, packet: dict, output_dir: Path
     ) -> Optional[dict]:
         """
         Helper to retrieve a memoized packet without acquiring a lock.
@@ -367,7 +367,7 @@ def _retrieve_without_lock(
         This is used internally when we already have a lock.
 
         Args:
-            store_name: Name of the store
+            function_name: Name of the function
             content_hash: Hash of the function/operation
             packet: Input packet
             output_dir: Directory containing the output
@@ -412,16 +412,16 @@ def _retrieve_without_lock(
             logger.error(f"Error loading memoized output for packet {packet}: {e}")
             return None
 
-    def clear_store(self, store_name: str) -> None:
+    def clear_store(self, function_name: str) -> None:
         """
         Clear a specific store.
 
         Args:
-            store_name: Name of the store to clear
+            function_name: Name of the function to clear
         """
         import shutil
 
-        store_path = self.store_dir / store_name
+        store_path = self.store_dir / function_name
         if store_path.exists():
             shutil.rmtree(store_path)
 
@@ -433,24 +433,24 @@ def clear_all_stores(self) -> None:
             shutil.rmtree(self.store_dir)
             self.store_dir.mkdir(parents=True, exist_ok=True)
 
-    def clean_stale_data(self, store_name=None, max_age=86400):
+    def clean_stale_data(self, function_name=None, max_age=86400):
         """
         Clean up stale data in the store.
 
         Args:
-            store_name: Optional name of the store to clean, or None for all stores
+            function_name: Optional name of the function to clean, or None for all functions
             max_age: Maximum age of data in seconds before it's considered stale
         """
         import shutil
 
-        if store_name is None:
+        if function_name is None:
             # Clean all stores
             for store_dir in self.store_dir.iterdir():
                 if store_dir.is_dir():
                     self.clean_stale_data(store_dir.name, max_age)
             return
 
-        store_path = self.store_dir / store_name
+        store_path = self.store_dir / function_name
         if not store_path.is_dir():
             return
 
diff --git a/src/orcabridge/store/transfer.py b/src/orcabridge/store/transfer.py
new file mode 100644
index 0000000..c4757ef
--- /dev/null
+++ b/src/orcabridge/store/transfer.py
@@ -0,0 +1,68 @@
+# Implements transfer data store that lets you transfer memoized packets between data stores.
+
+from orcabridge.store.types import DataStore
+from orcabridge.types import Packet
+
+
+class TransferDataStore(DataStore):
+    """
+    A data store that allows transferring memoized packets between different data stores.
+    This is useful for moving data between different storage backends.
+    """
+
+    def __init__(self, source_store: DataStore, target_store: DataStore) -> None:
+        self.source_store = source_store
+        self.target_store = target_store
+
+    def transfer(self, function_name: str, content_hash: str, packet: Packet) -> Packet:
+        """
+        Transfer a memoized packet from the source store to the target store.
+        """
+        retrieved_packet = self.source_store.retrieve_memoized(
+            function_name, content_hash, packet
+        )
+        if retrieved_packet is None:
+            raise ValueError("Packet not found in source store.")
+
+        return self.target_store.memoize(
+            function_name, content_hash, packet, retrieved_packet
+        )
+
+    def retrieve_memoized(
+        self, function_name: str, function_hash: str, packet: Packet
+    ) -> Packet | None:
+        """
+        Retrieve a memoized packet from the target store.
+        """
+        # Try retrieving from the target store first
+        memoized_packet = self.target_store.retrieve_memoized(
+            function_name, function_hash, packet
+        )
+        if memoized_packet is not None:
+            return memoized_packet
+
+        # If not found, try retrieving from the source store
+        memoized_packet = self.source_store.retrieve_memoized(
+            function_name, function_hash, packet
+        )
+        if memoized_packet is not None:
+            # Memoize the packet in the target store as part of the transfer
+            self.target_store.memoize(
+                function_name, function_hash, packet, memoized_packet
+            )
+
+        return memoized_packet
+
+    def memoize(
+        self,
+        function_name: str,
+        function_hash: str,
+        packet: Packet,
+        output_packet: Packet,
+    ) -> Packet:
+        """
+        Memoize a packet in the target store.
+        """
+        return self.target_store.memoize(
+            function_name, function_hash, packet, output_packet
+        )
diff --git a/src/orcabridge/store/types.py b/src/orcabridge/store/types.py
new file mode 100644
index 0000000..b32aede
--- /dev/null
+++ b/src/orcabridge/store/types.py
@@ -0,0 +1,24 @@
+from typing import Protocol, runtime_checkable
+
+from orcabridge.types import Packet
+
+
+@runtime_checkable
+class DataStore(Protocol):
+    """
+    Protocol for data stores that can memoize and retrieve packets.
+    This is used to define the interface for data stores like DirDataStore.
+    """
+
+    def __init__(self, *args, **kwargs) -> None: ...
+    def memoize(
+        self,
+        function_name: str,
+        function_hash: str,
+        packet: Packet,
+        output_packet: Packet,
+    ) -> Packet: ...
+
+    def retrieve_memoized(
+        self, function_name: str, function_hash: str, packet: Packet
+    ) -> Packet | None: ...
diff --git a/src/orcabridge/stream.py b/src/orcabridge/streams.py
similarity index 92%
rename from src/orcabridge/stream.py
rename to src/orcabridge/streams.py
index b2f6c13..03100c7 100644
--- a/src/orcabridge/stream.py
+++ b/src/orcabridge/streams.py
@@ -1,6 +1,7 @@
-from orcabridge.types import Tag, Packet
+from collections.abc import Callable, Collection, Iterator
+
 from orcabridge.base import SyncStream
-from collections.abc import Collection, Iterator, Callable
+from orcabridge.types import Packet, Tag
 
 
 class SyncStreamFromLists(SyncStream):
@@ -11,13 +12,14 @@ def __init__(
         paired: Collection[tuple[Tag, Packet]] | None = None,
         tag_keys: list[str] | None = None,
         packet_keys: list[str] | None = None,
+        strict: bool = True,
         **kwargs,
     ) -> None:
         super().__init__(**kwargs)
         self.tag_keys = tag_keys
         self.packet_keys = packet_keys
         if tags is not None and packets is not None:
-            if len(tags) != len(packets):
+            if strict and len(tags) != len(packets):
                 raise ValueError(
                     "tags and packets must have the same length if both are provided"
                 )
diff --git a/src/orcabridge/tracker.py b/src/orcabridge/tracker.py
index 76c7bf7..e8224a2 100644
--- a/src/orcabridge/tracker.py
+++ b/src/orcabridge/tracker.py
@@ -1,5 +1,5 @@
+from orcabridge.base import Invocation, Operation, Tracker
 import networkx as nx
-from orcabridge.base import Operation, Invocation, Tracker
 import matplotlib.pyplot as plt
 
 
diff --git a/src/orcabridge/types.py b/src/orcabridge/types.py
index 626023c..51a0284 100644
--- a/src/orcabridge/types.py
+++ b/src/orcabridge/types.py
@@ -1,13 +1,15 @@
-from typing import Protocol
+import os
 from collections.abc import Collection, Mapping
+from typing import Protocol
+
 from typing_extensions import TypeAlias
-import os
 
 # Convenience alias for anything pathlike
 PathLike = str | os.PathLike
 
 # an (optional) string or a collection of (optional) string values
-TagValue: TypeAlias = str | None | Collection[str | None]
+# Note that TagValue can be nested, allowing for an arbitrary depth of nested lists
+TagValue: TypeAlias = str | None | Collection["TagValue"]
 
 
 # the top level tag is a mapping from string keys to values that can be a string or
diff --git a/src/orcabridge/utils/stream_utils.py b/src/orcabridge/utils/stream_utils.py
index 9edc92c..611e94e 100644
--- a/src/orcabridge/utils/stream_utils.py
+++ b/src/orcabridge/utils/stream_utils.py
@@ -2,9 +2,10 @@
 Utility functions for handling tags
 """
 
-from typing import TypeVar
 from collections.abc import Collection, Mapping
-from orcabridge.types import Tag, Packet
+from typing import TypeVar
+
+from orcabridge.types import Packet, Tag
 
 K = TypeVar("K")
 V = TypeVar("V")
@@ -50,7 +51,7 @@ def check_packet_compatibility(packet1: Packet, packet2: Packet) -> bool:
     return True
 
 
-def batch_tag(all_tags: Collection[Tag]) -> Tag:
+def batch_tags(all_tags: Collection[Tag]) -> Tag:
     """
     Batches the tags together. Grouping values under the same key into a list.
     """
@@ -86,3 +87,13 @@ def batch_packet(
                     raise KeyError(f"Packet {p} does not have key {k}")
             batch_packet[k].append(p[k])
     return batch_packet
+
+
+def fill_missing(dict, keys, default=None):
+    """
+    Fill the missing keys in the dictionary with the specified default value.
+    """
+    for key in keys:
+        if key not in dict:
+            dict[key] = default
+    return dict
diff --git a/tests/test_hashing/generate_file_hashes.py b/tests/test_hashing/generate_file_hashes.py
index 57a5e4d..a2fe385 100644
--- a/tests/test_hashing/generate_file_hashes.py
+++ b/tests/test_hashing/generate_file_hashes.py
@@ -11,8 +11,8 @@
 import random
 import string
 import sys
-from pathlib import Path
 from datetime import datetime
+from pathlib import Path
 
 # Add the parent directory to the path to import orcabridge
 sys.path.append(str(Path(__file__).parent.parent.parent))
diff --git a/tests/test_hashing/generate_hash_examples.py b/tests/test_hashing/generate_hash_examples.py
index bd266c1..cbba97b 100644
--- a/tests/test_hashing/generate_hash_examples.py
+++ b/tests/test_hashing/generate_hash_examples.py
@@ -4,9 +4,10 @@
 # and revision of the codebase.
 
 import json
-from pathlib import Path
 from collections import OrderedDict
 from datetime import datetime
+from pathlib import Path
+
 from orcabridge.hashing import hash_to_hex, hash_to_int, hash_to_uuid
 
 # Create the hash_samples directory if it doesn't exist
diff --git a/tests/test_hashing/generate_pathset_packet_hashes.py b/tests/test_hashing/generate_pathset_packet_hashes.py
index 376fb60..6314e66 100644
--- a/tests/test_hashing/generate_pathset_packet_hashes.py
+++ b/tests/test_hashing/generate_pathset_packet_hashes.py
@@ -13,7 +13,7 @@
 
 # Add the parent directory to the path to import orcabridge
 sys.path.append(str(Path(__file__).parent.parent.parent))
-from orcabridge.hashing import hash_pathset, hash_packet
+from orcabridge.hashing import hash_packet, hash_pathset
 
 # Create directories if they don't exist
 HASH_SAMPLES_DIR = Path(__file__).parent / "hash_samples"
diff --git a/tests/test_hashing/test_basic_composite_hasher.py b/tests/test_hashing/test_basic_composite_hasher.py
index d8fcc58..798f79d 100644
--- a/tests/test_hashing/test_basic_composite_hasher.py
+++ b/tests/test_hashing/test_basic_composite_hasher.py
@@ -9,9 +9,10 @@
 """
 
 import json
-import pytest
 from pathlib import Path
 
+import pytest
+
 from orcabridge.hashing.file_hashers import HasherFactory
 
 
diff --git a/tests/test_hashing/test_basic_hashing.py b/tests/test_hashing/test_basic_hashing.py
index c0a8f84..5ab355f 100644
--- a/tests/test_hashing/test_basic_hashing.py
+++ b/tests/test_hashing/test_basic_hashing.py
@@ -1,8 +1,8 @@
 from orcabridge.hashing.core import (
+    HashableMixin,
     hash_to_hex,
     hash_to_int,
     hash_to_uuid,
-    HashableMixin,
     stable_hash,
 )
 
diff --git a/tests/test_hashing/test_cached_file_hasher.py b/tests/test_hashing/test_cached_file_hasher.py
index f147b2b..e8c3199 100644
--- a/tests/test_hashing/test_cached_file_hasher.py
+++ b/tests/test_hashing/test_cached_file_hasher.py
@@ -3,12 +3,13 @@
 """Tests for CachedFileHasher implementation."""
 
 import json
-import pytest
-from pathlib import Path
-import tempfile
 import os
+import tempfile
+from pathlib import Path
 from unittest.mock import MagicMock
 
+import pytest
+
 from orcabridge.hashing.file_hashers import (
     BasicFileHasher,
     CachedFileHasher,
diff --git a/tests/test_hashing/test_composite_hasher.py b/tests/test_hashing/test_composite_hasher.py
index 105716d..7ca2c25 100644
--- a/tests/test_hashing/test_composite_hasher.py
+++ b/tests/test_hashing/test_composite_hasher.py
@@ -2,12 +2,13 @@
 # filepath: /home/eywalker/workspace/orcabridge/tests/test_hashing/test_composite_hasher.py
 """Tests for the CompositeHasher implementation."""
 
-import pytest
 from unittest.mock import patch
 
-from orcabridge.hashing.file_hashers import CompositeHasher, BasicFileHasher
-from orcabridge.hashing.types import FileHasher, PathSetHasher, PacketHasher
+import pytest
+
 from orcabridge.hashing.core import hash_to_hex
+from orcabridge.hashing.file_hashers import BasicFileHasher, CompositeHasher
+from orcabridge.hashing.types import FileHasher, PacketHasher, PathSetHasher
 
 
 # Custom implementation of hash_file for tests that doesn't check for file existence
@@ -22,9 +23,9 @@ def mock_hash_pathset(
     pathset, algorithm="sha256", buffer_size=65536, char_count=32, file_hasher=None
 ):
     """Mock implementation of hash_pathset that doesn't check for file existence."""
+    from collections.abc import Collection
     from os import PathLike
     from pathlib import Path
-    from collections.abc import Collection
 
     # If file_hasher is None, we'll need to handle it differently
     if file_hasher is None:
diff --git a/tests/test_hashing/test_file_hashes.py b/tests/test_hashing/test_file_hashes.py
index 0e3da34..70ff814 100644
--- a/tests/test_hashing/test_file_hashes.py
+++ b/tests/test_hashing/test_file_hashes.py
@@ -8,9 +8,10 @@
 """
 
 import json
-import pytest
 from pathlib import Path
 
+import pytest
+
 # Add the parent directory to the path to import orcabridge
 from orcabridge.hashing import hash_file
 
diff --git a/tests/test_hashing/test_hash_samples.py b/tests/test_hashing/test_hash_samples.py
index 8d4fb10..54fa32f 100644
--- a/tests/test_hashing/test_hash_samples.py
+++ b/tests/test_hashing/test_hash_samples.py
@@ -6,10 +6,12 @@
 the hashing implementation remains stable over time.
 """
 
-import os
 import json
-import pytest
+import os
 from pathlib import Path
+
+import pytest
+
 from orcabridge.hashing import hash_to_hex, hash_to_int, hash_to_uuid
 
 
diff --git a/tests/test_hashing/test_hasher_factory.py b/tests/test_hashing/test_hasher_factory.py
index eb9faf5..81631ab 100644
--- a/tests/test_hashing/test_hasher_factory.py
+++ b/tests/test_hashing/test_hasher_factory.py
@@ -9,7 +9,7 @@
     CachedFileHasher,
     HasherFactory,
 )
-from orcabridge.hashing.string_cachers import InMemoryCacher, FileCacher
+from orcabridge.hashing.string_cachers import FileCacher, InMemoryCacher
 
 
 class TestHasherFactoryCreateFileHasher:
diff --git a/tests/test_hashing/test_hasher_parity.py b/tests/test_hashing/test_hasher_parity.py
index 3d0a654..0ec700e 100644
--- a/tests/test_hashing/test_hasher_parity.py
+++ b/tests/test_hashing/test_hasher_parity.py
@@ -9,12 +9,13 @@
 """
 
 import json
-import pytest
-from pathlib import Path
 import random
+from pathlib import Path
+
+import pytest
 
+from orcabridge.hashing.core import hash_file, hash_packet, hash_pathset
 from orcabridge.hashing.file_hashers import HasherFactory
-from orcabridge.hashing.core import hash_file, hash_pathset, hash_packet
 
 
 def load_hash_lut():
diff --git a/tests/test_hashing/test_path_set_hasher.py b/tests/test_hashing/test_path_set_hasher.py
index 574bddd..ed75b3d 100644
--- a/tests/test_hashing/test_path_set_hasher.py
+++ b/tests/test_hashing/test_path_set_hasher.py
@@ -2,15 +2,16 @@
 # filepath: /home/eywalker/workspace/orcabridge/tests/test_hashing/test_path_set_hasher.py
 """Tests for the PathSetHasher protocol implementation."""
 
-import pytest
 import os
 import tempfile
 from pathlib import Path
 from unittest.mock import patch
 
+import pytest
+
+import orcabridge.hashing.core
 from orcabridge.hashing.file_hashers import DefaultPathsetHasher
 from orcabridge.hashing.types import FileHasher
-import orcabridge.hashing.core
 
 
 class MockFileHasher(FileHasher):
@@ -43,9 +44,10 @@ def mock_hash_pathset(
     pathset, algorithm="sha256", buffer_size=65536, char_count=32, file_hasher=None
 ):
     """Mock implementation of hash_pathset that doesn't check for file existence."""
-    from orcabridge.hashing.core import hash_to_hex
-    from os import PathLike
     from collections.abc import Collection
+    from os import PathLike
+
+    from orcabridge.hashing.core import hash_to_hex
     from orcabridge.utils.name import find_noncolliding_name
 
     # If file_hasher is None, we'll need to handle it differently
diff --git a/tests/test_hashing/test_pathset_and_packet.py b/tests/test_hashing/test_pathset_and_packet.py
index 6e0410e..91efbc7 100644
--- a/tests/test_hashing/test_pathset_and_packet.py
+++ b/tests/test_hashing/test_pathset_and_packet.py
@@ -7,13 +7,14 @@
 functions with various input types and configurations.
 """
 
+import logging
 import os
-import pytest
 import tempfile
 from pathlib import Path
-import logging
 
-from orcabridge.hashing import hash_pathset, hash_packet, hash_file
+import pytest
+
+from orcabridge.hashing import hash_file, hash_packet, hash_pathset
 
 logger = logging.getLogger(__name__)
 
diff --git a/tests/test_hashing/test_pathset_packet_hashes.py b/tests/test_hashing/test_pathset_packet_hashes.py
index 548cc9a..9f31f00 100644
--- a/tests/test_hashing/test_pathset_packet_hashes.py
+++ b/tests/test_hashing/test_pathset_packet_hashes.py
@@ -8,11 +8,12 @@
 """
 
 import json
-import pytest
 from pathlib import Path
 
+import pytest
+
 # Add the parent directory to the path to import orcabridge
-from orcabridge.hashing import hash_pathset, hash_packet
+from orcabridge.hashing import hash_packet, hash_pathset
 
 
 def load_pathset_hash_lut():
diff --git a/tests/test_hashing/test_process_structure.py b/tests/test_hashing/test_process_structure.py
index e145294..24b3b08 100644
--- a/tests/test_hashing/test_process_structure.py
+++ b/tests/test_hashing/test_process_structure.py
@@ -1,9 +1,9 @@
-from typing import Any
 import uuid
-from collections import namedtuple, OrderedDict
+from collections import OrderedDict, namedtuple
 from pathlib import Path
+from typing import Any
 
-from orcabridge.hashing.core import HashableMixin, process_structure, hash_to_hex
+from orcabridge.hashing.core import HashableMixin, hash_to_hex, process_structure
 
 
 # Define a simple HashableMixin class for testing
diff --git a/tests/test_hashing/test_sqlite_cacher.py b/tests/test_hashing/test_sqlite_cacher.py
index 898a7f3..99a8030 100644
--- a/tests/test_hashing/test_sqlite_cacher.py
+++ b/tests/test_hashing/test_sqlite_cacher.py
@@ -5,7 +5,8 @@
 import threading
 import time
 from pathlib import Path
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
+
 from orcabridge.hashing.string_cachers import SQLiteCacher
 
 
diff --git a/tests/test_hashing/test_string_cacher/test_file_cacher.py b/tests/test_hashing/test_string_cacher/test_file_cacher.py
index 20e8057..223fcf8 100644
--- a/tests/test_hashing/test_string_cacher/test_file_cacher.py
+++ b/tests/test_hashing/test_string_cacher/test_file_cacher.py
@@ -4,7 +4,8 @@
 import tempfile
 import threading
 from pathlib import Path
-from unittest.mock import patch, mock_open
+from unittest.mock import mock_open, patch
+
 from orcabridge.hashing.string_cachers import FileCacher
 
 
diff --git a/tests/test_hashing/test_string_cacher/test_in_memory_cacher.py b/tests/test_hashing/test_string_cacher/test_in_memory_cacher.py
index 8dcf7b0..5e76e44 100644
--- a/tests/test_hashing/test_string_cacher/test_in_memory_cacher.py
+++ b/tests/test_hashing/test_string_cacher/test_in_memory_cacher.py
@@ -2,6 +2,7 @@
 
 import threading
 import time
+
 from orcabridge.hashing.string_cachers import InMemoryCacher
 
 
diff --git a/tests/test_hashing/test_string_cacher/test_redis_cacher.py b/tests/test_hashing/test_string_cacher/test_redis_cacher.py
index 6477921..ac04b82 100644
--- a/tests/test_hashing/test_string_cacher/test_redis_cacher.py
+++ b/tests/test_hashing/test_string_cacher/test_redis_cacher.py
@@ -1,7 +1,10 @@
 """Tests for RedisCacher using mocked Redis."""
 
-import pytest
+from typing import cast
 from unittest.mock import patch
+
+import pytest
+
 from orcabridge.hashing.string_cachers import RedisCacher
 
 
diff --git a/tests/test_hashing/test_string_cacher/test_sqlite_cacher.py b/tests/test_hashing/test_string_cacher/test_sqlite_cacher.py
index bb8eab2..9204543 100644
--- a/tests/test_hashing/test_string_cacher/test_sqlite_cacher.py
+++ b/tests/test_hashing/test_string_cacher/test_sqlite_cacher.py
@@ -5,7 +5,8 @@
 import threading
 import time
 from pathlib import Path
-from unittest.mock import patch, MagicMock
+from unittest.mock import MagicMock, patch
+
 from orcabridge.hashing.string_cachers import SQLiteCacher
 
 
diff --git a/tests/test_hashing/test_string_cachers.py b/tests/test_hashing/test_string_cachers.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_store/conftest.py b/tests/test_store/conftest.py
index 7f157e6..77ca9f9 100644
--- a/tests/test_store/conftest.py
+++ b/tests/test_store/conftest.py
@@ -2,11 +2,12 @@
 # filepath: /home/eywalker/workspace/orcabridge/tests/test_store/conftest.py
 """Common test fixtures for store tests."""
 
-import pytest
-import tempfile
 import shutil
+import tempfile
 from pathlib import Path
 
+import pytest
+
 
 @pytest.fixture
 def temp_dir():
diff --git a/tests/test_store/test_dir_data_store.py b/tests/test_store/test_dir_data_store.py
index 7f61b01..37e467c 100644
--- a/tests/test_store/test_dir_data_store.py
+++ b/tests/test_store/test_dir_data_store.py
@@ -2,18 +2,19 @@
 # filepath: /home/eywalker/workspace/orcabridge/tests/test_store/test_dir_data_store.py
 """Tests for DirDataStore."""
 
-import pytest
 import json
 import shutil
 from pathlib import Path
 
-from orcabridge.store.dir_data_store import DirDataStore
+import pytest
+
 from orcabridge.hashing.types import (
+    CompositeFileHasher,
     FileHasher,
-    PathSetHasher,
     PacketHasher,
-    CompositeFileHasher,
+    PathSetHasher,
 )
+from orcabridge.store.core import DirDataStore
 
 
 class MockFileHasher(FileHasher):
@@ -447,7 +448,7 @@ def test_dir_data_store_clear_all_stores(temp_dir, sample_files):
     assert (store_dir / "store2").exists()
 
     # Clear all stores with force and non-interactive mode
-    store.clear_all_stores(interactive=False, store_name=str(store_dir), force=True)
+    store.clear_all_stores(interactive=False, function_name=str(store_dir), force=True)
 
     # Check that the entire store directory was deleted
     assert not store_dir.exists()
diff --git a/tests/test_store/test_integration.py b/tests/test_store/test_integration.py
index 9efc8f3..22c67c9 100644
--- a/tests/test_store/test_integration.py
+++ b/tests/test_store/test_integration.py
@@ -2,17 +2,18 @@
 # filepath: /home/eywalker/workspace/orcabridge/tests/test_store/test_integration.py
 """Integration tests for the store module."""
 
-import pytest
 import os
 from pathlib import Path
 
-from orcabridge.store.dir_data_store import DirDataStore, NoOpDataStore
+import pytest
+
 from orcabridge.hashing.file_hashers import (
     BasicFileHasher,
     CachedFileHasher,
     CompositeHasher,
 )
 from orcabridge.hashing.string_cachers import InMemoryCacher
+from orcabridge.store.core import DirDataStore, NoOpDataStore
 
 
 def test_integration_with_cached_file_hasher(temp_dir, sample_files):
@@ -82,10 +83,10 @@ def test_integration_data_store_chain(temp_dir, sample_files):
     store2.memoize("test_chain", "content_hash_456", packet2, output_packet2)
 
     # Create a function that tries each store in sequence
-    def retrieve_from_stores(store_name, content_hash, packet):
+    def retrieve_from_stores(function_name, content_hash, packet):
         for store in [store1, store2, store3]:
             try:
-                result = store.retrieve_memoized(store_name, content_hash, packet)
+                result = store.retrieve_memoized(function_name, content_hash, packet)
                 if result is not None:
                     return result
             except FileNotFoundError:
@@ -113,11 +114,11 @@ def retrieve_from_stores(store_name, content_hash, packet):
     # without actually trying to hash nonexistent files
     original_retrieve = store1.retrieve_memoized
 
-    def mocked_retrieve(store_name, content_hash, packet):
+    def mocked_retrieve(function_name, content_hash, packet):
         # Only return None for our specific test case
-        if store_name == "test_chain" and content_hash == "content_hash_789":
+        if function_name == "test_chain" and content_hash == "content_hash_789":
             return None
-        return original_retrieve(store_name, content_hash, packet)
+        return original_retrieve(function_name, content_hash, packet)
 
     # Apply the mock to all stores
     store1.retrieve_memoized = mocked_retrieve
diff --git a/tests/test_store/test_noop_data_store.py b/tests/test_store/test_noop_data_store.py
index 80ffd24..8f160d1 100644
--- a/tests/test_store/test_noop_data_store.py
+++ b/tests/test_store/test_noop_data_store.py
@@ -3,7 +3,8 @@
 """Tests for NoOpDataStore."""
 
 import pytest
-from orcabridge.store.dir_data_store import NoOpDataStore
+
+from orcabridge.store.core import NoOpDataStore
 
 
 def test_noop_data_store_memoize():
@@ -43,7 +44,7 @@ def test_noop_data_store_retrieve_memoized():
 
 def test_noop_data_store_is_data_store_subclass():
     """Test that NoOpDataStore is a subclass of DataStore."""
-    from orcabridge.store.dir_data_store import DataStore
+    from orcabridge.store.core import DataStore
 
     store = NoOpDataStore()
     assert isinstance(store, DataStore)
diff --git a/tests/test_store/test_transfer_data_store.py b/tests/test_store/test_transfer_data_store.py
new file mode 100644
index 0000000..ddb1d09
--- /dev/null
+++ b/tests/test_store/test_transfer_data_store.py
@@ -0,0 +1,450 @@
+#!/usr/bin/env python
+# filepath: /home/eywalker/workspace/orcabridge/tests/test_store/test_transfer_data_store.py
+"""Tests for TransferDataStore."""
+
+import json
+from pathlib import Path
+
+import pytest
+
+from orcabridge.hashing.types import PacketHasher
+from orcabridge.store.core import DirDataStore, NoOpDataStore
+from orcabridge.store.transfer import TransferDataStore
+
+
+class MockPacketHasher(PacketHasher):
+    """Mock PacketHasher for testing."""
+
+    def __init__(self, hash_value="mock_hash"):
+        self.hash_value = hash_value
+        self.packet_hash_calls = []
+
+    def hash_packet(self, packet):
+        self.packet_hash_calls.append(packet)
+        return f"{self.hash_value}_packet"
+
+
+def test_transfer_data_store_basic_setup(temp_dir, sample_files):
+    """Test basic setup of TransferDataStore."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    source_store = DirDataStore(store_dir=source_store_dir)
+    target_store = DirDataStore(store_dir=target_store_dir)
+
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Verify the stores are set correctly
+    assert transfer_store.source_store is source_store
+    assert transfer_store.target_store is target_store
+
+
+def test_transfer_data_store_memoize_to_target(temp_dir, sample_files):
+    """Test that memoize stores packets in the target store."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    source_store = DirDataStore(store_dir=source_store_dir)
+    target_store = DirDataStore(store_dir=target_store_dir)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create packet and output
+    packet = {"input_file": sample_files["input"]["file1"]}
+    output_packet = {"output_file": sample_files["output"]["output1"]}
+
+    # Memoize through transfer store
+    result = transfer_store.memoize(
+        "test_store", "content_hash_123", packet, output_packet
+    )
+
+    # Verify the packet was stored in target store
+    assert "output_file" in result
+
+    # Verify we can retrieve it directly from target store
+    retrieved_from_target = target_store.retrieve_memoized(
+        "test_store", "content_hash_123", packet
+    )
+    assert retrieved_from_target is not None
+    assert "output_file" in retrieved_from_target
+
+    # Verify it's NOT in the source store
+    retrieved_from_source = source_store.retrieve_memoized(
+        "test_store", "content_hash_123", packet
+    )
+    assert retrieved_from_source is None
+
+
+def test_transfer_data_store_retrieve_from_target_first(temp_dir, sample_files):
+    """Test that retrieve_memoized checks target store first."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    source_store = DirDataStore(store_dir=source_store_dir)
+    target_store = DirDataStore(store_dir=target_store_dir)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create packet and output
+    packet = {"input_file": sample_files["input"]["file1"]}
+    output_packet = {"output_file": sample_files["output"]["output1"]}
+
+    # Store directly in target store
+    target_store.memoize("test_store", "content_hash_123", packet, output_packet)
+
+    # Retrieve through transfer store should find it in target
+    result = transfer_store.retrieve_memoized("test_store", "content_hash_123", packet)
+
+    assert result is not None
+    assert "output_file" in result
+
+
+def test_transfer_data_store_fallback_to_source_and_copy(temp_dir, sample_files):
+    """Test that retrieve_memoized falls back to source store and copies to target."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    source_store = DirDataStore(store_dir=source_store_dir)
+    target_store = DirDataStore(store_dir=target_store_dir)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create packet and output
+    packet = {"input_file": sample_files["input"]["file1"]}
+    output_packet = {"output_file": sample_files["output"]["output1"]}
+
+    # Store only in source store
+    source_store.memoize("test_store", "content_hash_123", packet, output_packet)
+
+    # Verify it's not in target initially
+    retrieved_from_target = target_store.retrieve_memoized(
+        "test_store", "content_hash_123", packet
+    )
+    assert retrieved_from_target is None
+
+    # Retrieve through transfer store should find it in source and copy to target
+    result = transfer_store.retrieve_memoized("test_store", "content_hash_123", packet)
+
+    assert result is not None
+    assert "output_file" in result
+
+    # Now verify it was copied to target store
+    retrieved_from_target_after = target_store.retrieve_memoized(
+        "test_store", "content_hash_123", packet
+    )
+    assert retrieved_from_target_after is not None
+    assert "output_file" in retrieved_from_target_after
+
+
+def test_transfer_data_store_multiple_packets(temp_dir, sample_files):
+    """Test transfer functionality with multiple packets."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    source_store = DirDataStore(store_dir=source_store_dir)
+    target_store = DirDataStore(store_dir=target_store_dir)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create multiple packets
+    packets = [
+        {"input_file": sample_files["input"]["file1"]},
+        {"input_file": sample_files["input"]["file2"]},
+    ]
+
+    output_packets = [
+        {"output_file": sample_files["output"]["output1"]},
+        {"output_file": sample_files["output"]["output2"]},
+    ]
+
+    content_hashes = ["content_hash_1", "content_hash_2"]
+
+    # Store all packets in source store
+    for i, (packet, output_packet, content_hash) in enumerate(
+        zip(packets, output_packets, content_hashes)
+    ):
+        source_store.memoize("test_store", content_hash, packet, output_packet)
+
+    # Verify none are in target initially
+    for packet, content_hash in zip(packets, content_hashes):
+        retrieved = target_store.retrieve_memoized("test_store", content_hash, packet)
+        assert retrieved is None
+
+    # Retrieve all packets through transfer store
+    results = []
+    for packet, content_hash in zip(packets, content_hashes):
+        result = transfer_store.retrieve_memoized("test_store", content_hash, packet)
+        assert result is not None
+        results.append(result)
+
+    # Verify all packets are now in target store
+    for packet, content_hash in zip(packets, content_hashes):
+        retrieved = target_store.retrieve_memoized("test_store", content_hash, packet)
+        assert retrieved is not None
+        assert "output_file" in retrieved
+
+
+def test_transfer_data_store_explicit_transfer_method(temp_dir, sample_files):
+    """Test the explicit transfer method."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    source_store = DirDataStore(store_dir=source_store_dir)
+    target_store = DirDataStore(store_dir=target_store_dir)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create packet and output
+    packet = {"input_file": sample_files["input"]["file1"]}
+    output_packet = {"output_file": sample_files["output"]["output1"]}
+
+    # Store in source store
+    source_store.memoize("test_store", "content_hash_123", packet, output_packet)
+
+    # Use explicit transfer method
+    result = transfer_store.transfer("test_store", "content_hash_123", packet)
+
+    assert result is not None
+    assert "output_file" in result
+
+    # Verify it's now in target store
+    retrieved_from_target = target_store.retrieve_memoized(
+        "test_store", "content_hash_123", packet
+    )
+    assert retrieved_from_target is not None
+
+
+def test_transfer_data_store_transfer_method_not_found(temp_dir, sample_files):
+    """Test transfer method raises error when packet not found in source."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    source_store = DirDataStore(store_dir=source_store_dir)
+    target_store = DirDataStore(store_dir=target_store_dir)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create packet
+    packet = {"input_file": sample_files["input"]["file1"]}
+
+    # Try to transfer packet that doesn't exist
+    with pytest.raises(ValueError, match="Packet not found in source store"):
+        transfer_store.transfer("test_store", "nonexistent_hash", packet)
+
+
+def test_transfer_data_store_retrieve_nonexistent_packet(temp_dir, sample_files):
+    """Test retrieve_memoized returns None for nonexistent packets."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    source_store = DirDataStore(store_dir=source_store_dir)
+    target_store = DirDataStore(store_dir=target_store_dir)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create packet
+    packet = {"input_file": sample_files["input"]["file1"]}
+
+    # Try to retrieve nonexistent packet
+    result = transfer_store.retrieve_memoized("test_store", "nonexistent_hash", packet)
+    assert result is None
+
+
+def test_transfer_data_store_different_file_hashers(temp_dir, sample_files):
+    """Test transfer between stores with different file hashers."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    # Create stores with different hashers
+    source_hasher = MockPacketHasher(hash_value="source_hash")
+    target_hasher = MockPacketHasher(hash_value="target_hash")
+
+    source_store = DirDataStore(store_dir=source_store_dir, packet_hasher=source_hasher)
+    target_store = DirDataStore(store_dir=target_store_dir, packet_hasher=target_hasher)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create packet and output
+    packet = {"input_file": sample_files["input"]["file1"]}
+    output_packet = {"output_file": sample_files["output"]["output1"]}
+
+    # Store in source store
+    source_store.memoize("test_store", "content_hash_123", packet, output_packet)
+
+    # Verify it's in source store using source hasher
+    retrieved_from_source = source_store.retrieve_memoized(
+        "test_store", "content_hash_123", packet
+    )
+    assert retrieved_from_source is not None
+
+    # Transfer through transfer store - this should work despite different hashers
+    result = transfer_store.retrieve_memoized("test_store", "content_hash_123", packet)
+    assert result is not None
+    assert "output_file" in result
+
+    # Verify it's now in target store using target hasher
+    retrieved_from_target = target_store.retrieve_memoized(
+        "test_store", "content_hash_123", packet
+    )
+    assert retrieved_from_target is not None
+
+    # Verify both hashers were called
+    assert len(source_hasher.packet_hash_calls) > 0
+    assert len(target_hasher.packet_hash_calls) > 0
+
+
+def test_transfer_data_store_memoize_new_packet_with_different_hashers(
+    temp_dir, sample_files
+):
+    """Test memoizing new packets when source and target have different hashers."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    # Create stores with different hashers
+    source_hasher = MockPacketHasher(hash_value="source_hash")
+    target_hasher = MockPacketHasher(hash_value="target_hash")
+
+    source_store = DirDataStore(store_dir=source_store_dir, packet_hasher=source_hasher)
+    target_store = DirDataStore(store_dir=target_store_dir, packet_hasher=target_hasher)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create packet and output
+    packet = {"input_file": sample_files["input"]["file1"]}
+    output_packet = {"output_file": sample_files["output"]["output1"]}
+
+    # Memoize through transfer store (should go to target)
+    result = transfer_store.memoize(
+        "test_store", "content_hash_123", packet, output_packet
+    )
+
+    assert result is not None
+    assert "output_file" in result
+
+    # Verify it's only in target store, not source
+    retrieved_from_target = target_store.retrieve_memoized(
+        "test_store", "content_hash_123", packet
+    )
+    assert retrieved_from_target is not None
+
+    retrieved_from_source = source_store.retrieve_memoized(
+        "test_store", "content_hash_123", packet
+    )
+    assert retrieved_from_source is None
+
+    # Verify target hasher was used for memoization
+    assert len(target_hasher.packet_hash_calls) > 0
+
+
+def test_transfer_data_store_complex_transfer_scenario(temp_dir, sample_files):
+    """Test complex scenario with multiple operations and different hashers."""
+    source_store_dir = Path(temp_dir) / "source_store"
+    target_store_dir = Path(temp_dir) / "target_store"
+
+    # Create stores with different hashers
+    source_hasher = MockPacketHasher(hash_value="source_hash")
+    target_hasher = MockPacketHasher(hash_value="target_hash")
+
+    source_store = DirDataStore(store_dir=source_store_dir, packet_hasher=source_hasher)
+    target_store = DirDataStore(store_dir=target_store_dir, packet_hasher=target_hasher)
+    transfer_store = TransferDataStore(
+        source_store=source_store, target_store=target_store
+    )
+
+    # Create multiple packets
+    packets = [
+        {"input_file": sample_files["input"]["file1"]},
+        {"input_file": sample_files["input"]["file2"]},
+    ]
+
+    output_packets = [
+        {"output_file": sample_files["output"]["output1"]},
+        {"output_file": sample_files["output"]["output2"]},
+    ]
+
+    content_hashes = ["content_hash_1", "content_hash_2"]
+
+    # 1. Store first packet directly in source
+    source_store.memoize("test_store", content_hashes[0], packets[0], output_packets[0])
+
+    # 2. Store second packet through transfer store (should go to target)
+    transfer_store.memoize(
+        "test_store", content_hashes[1], packets[1], output_packets[1]
+    )
+
+    # 3. Retrieve first packet through transfer store (should copy from source to target)
+    result1 = transfer_store.retrieve_memoized(
+        "test_store", content_hashes[0], packets[0]
+    )
+    assert result1 is not None
+
+    # 4. Retrieve second packet through transfer store (should find in target directly)
+    result2 = transfer_store.retrieve_memoized(
+        "test_store", content_hashes[1], packets[1]
+    )
+    assert result2 is not None
+
+    # 5. Verify both packets are now in target store
+    for packet, content_hash in zip(packets, content_hashes):
+        retrieved = target_store.retrieve_memoized("test_store", content_hash, packet)
+        assert retrieved is not None
+        assert "output_file" in retrieved
+
+    # 6. Verify first packet is still in source, second is not
+    retrieved_source_1 = source_store.retrieve_memoized(
+        "test_store", content_hashes[0], packets[0]
+    )
+    assert retrieved_source_1 is not None
+
+    retrieved_source_2 = source_store.retrieve_memoized(
+        "test_store", content_hashes[1], packets[1]
+    )
+    assert retrieved_source_2 is None
+
+
+def test_transfer_data_store_with_noop_stores(temp_dir, sample_files):
+    """Test transfer store behavior with NoOpDataStore."""
+    # Test with NoOp as source
+    noop_source = NoOpDataStore()
+    target_store_dir = Path(temp_dir) / "target_store"
+    target_store = DirDataStore(store_dir=target_store_dir)
+
+    transfer_store = TransferDataStore(
+        source_store=noop_source, target_store=target_store
+    )
+
+    packet = {"input": sample_files["input"]["file1"]}
+
+    # Should return None since NoOp store doesn't store anything
+    result = transfer_store.retrieve_memoized("test_store", "hash123", packet)
+    assert result is None
+
+    # Test with NoOp as target
+    source_store_dir = Path(temp_dir) / "source_store"
+    source_store = DirDataStore(store_dir=source_store_dir)
+    noop_target = NoOpDataStore()
+
+    transfer_store2 = TransferDataStore(
+        source_store=source_store, target_store=noop_target
+    )
+
+    output_packet = {"output": sample_files["output"]["output1"]}
+
+    # Memoize should work (goes to target which is NoOp)
+    result = transfer_store2.memoize("test_store", "hash123", packet, output_packet)
+    assert result == output_packet  # NoOp just returns the output packet
+
+
+if __name__ == "__main__":
+    pytest.main(["-v", __file__])
diff --git a/tests/test_streams_operations/__init__.py b/tests/test_streams_operations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_streams_operations/conftest.py b/tests/test_streams_operations/conftest.py
new file mode 100644
index 0000000..b6420a3
--- /dev/null
+++ b/tests/test_streams_operations/conftest.py
@@ -0,0 +1,204 @@
+"""
+Shared fixtures for streams and operations testing.
+"""
+
+import tempfile
+import json
+import numpy as np
+from pathlib import Path
+from typing import Any, Iterator
+import pytest
+
+from orcabridge.types import Tag, Packet
+from orcabridge.streams import SyncStreamFromLists
+from orcabridge.store import DirDataStore
+
+
+@pytest.fixture
+def temp_dir():
+    """Create a temporary directory for testing."""
+    with tempfile.TemporaryDirectory() as tmpdir:
+        yield Path(tmpdir)
+
+
+@pytest.fixture
+def sample_tags():
+    """Sample tags for testing."""
+    return [
+        {"file_name": "day1", "session": "morning"},
+        {"file_name": "day2", "session": "afternoon"},
+        {"file_name": "day3", "session": "evening"},
+    ]
+
+
+@pytest.fixture
+def sample_packets():
+    """Sample packets for testing."""
+    return [
+        {"txt_file": "data/day1.txt", "metadata": "meta1.json"},
+        {"txt_file": "data/day2.txt", "metadata": "meta2.json"},
+        {"txt_file": "data/day3.txt", "metadata": "meta3.json"},
+    ]
+
+
+@pytest.fixture
+def sample_stream(sample_tags, sample_packets):
+    """Create a sample stream from tags and packets."""
+    return SyncStreamFromLists(
+        tags=sample_tags,
+        packets=sample_packets,
+        tag_keys=["file_name", "session"],
+        packet_keys=["txt_file", "metadata"],
+    )
+
+
+@pytest.fixture
+def empty_stream() -> SyncStreamFromLists:
+    """Create an empty stream."""
+    return SyncStreamFromLists(paired=[])
+
+
+@pytest.fixture
+def single_item_stream() -> SyncStreamFromLists:
+    """Create a stream with a single item."""
+    return SyncStreamFromLists(tags=[{"name": "single"}], packets=[{"data": "value"}])
+
+
+@pytest.fixture
+def test_files(temp_dir) -> dict[str, Any]:
+    """Create test files for source testing."""
+    # Create text files
+    txt_dir = temp_dir / "txt_files"
+    txt_dir.mkdir()
+
+    txt_files = []
+    for i, day in enumerate(["day1", "day2", "day3"], 1):
+        txt_file = txt_dir / f"{day}.txt"
+        txt_file.write_text(f"Content for {day}\n" * (i * 5))
+        txt_files.append(txt_file)
+
+    # Create binary files with numpy arrays
+    bin_dir = temp_dir / "bin_files"
+    bin_dir.mkdir()
+
+    bin_files = []
+    for i, session in enumerate(["session_day1", "session_day2"], 1):
+        bin_file = bin_dir / f"{session}.bin"
+        data = np.random.rand(10 * i).astype(np.float64)
+        bin_file.write_bytes(data.tobytes())
+        bin_files.append(bin_file)
+
+    # Create json files
+    json_dir = temp_dir / "json_files"
+    json_dir.mkdir()
+
+    json_files = []
+    for i, info in enumerate(["info_day1", "info_day2"], 1):
+        json_file = json_dir / f"{info}.json"
+        data = {"lines": i * 5, "day": f"day{i}", "processed": False}
+        json_file.write_text(json.dumps(data))
+        json_files.append(json_file)
+
+    return {
+        "txt_dir": txt_dir,
+        "txt_files": txt_files,
+        "bin_dir": bin_dir,
+        "bin_files": bin_files,
+        "json_dir": json_dir,
+        "json_files": json_files,
+    }
+
+
+@pytest.fixture
+def data_store(temp_dir) -> DirDataStore:
+    """Create a test data store."""
+    store_dir = temp_dir / "data_store"
+    return DirDataStore(store_dir=store_dir)
+
+
+# Sample functions for FunctionPod testing
+
+
+def sample_function_no_output(input_file: str) -> None:
+    """Sample function that takes input but returns nothing."""
+    pass
+
+
+def sample_function_single_output(input_file: str) -> str:
+    """Sample function that returns a single output."""
+    return str(Path(input_file).with_suffix(".processed"))
+
+
+def sample_function_multiple_outputs(input_file: str) -> tuple[str, str]:
+    """Sample function that returns multiple outputs."""
+    base = Path(input_file).stem
+    return f"{base}_output1.txt", f"{base}_output2.txt"
+
+
+def sample_function_with_error(input_file: str) -> str:
+    """Sample function that raises an error."""
+    raise ValueError("Intentional error for testing")
+
+
+def count_lines_function(txt_file: str) -> int:
+    """Function that counts lines in a text file."""
+    with open(txt_file, "r") as f:
+        return len(f.readlines())
+
+
+def compute_stats_function(bin_file: str, temp_dir: str | None = None) -> str:
+    """Function that computes statistics on binary data."""
+    import tempfile
+
+    with open(bin_file, "rb") as f:
+        data = np.frombuffer(f.read(), dtype=np.float64)
+
+    stats = {
+        "mean": float(np.mean(data)),
+        "std": float(np.std(data)),
+        "min": float(np.min(data)),
+        "max": float(np.max(data)),
+        "count": len(data),
+    }
+
+    if temp_dir is None:
+        output_file = Path(tempfile.mkdtemp()) / "stats.json"
+    else:
+        output_file = Path(temp_dir) / "stats.json"
+
+    with open(output_file, "w") as f:
+        json.dump(stats, f)
+
+    return str(output_file)
+
+
+# Predicate functions for Filter testing
+
+
+def filter_by_session_morning(tag: Tag, packet: Packet) -> bool:
+    """Filter predicate that keeps only morning sessions."""
+    return tag.get("session") == "morning"
+
+
+def filter_by_filename_pattern(tag: Tag, packet: Packet) -> bool:
+    """Filter predicate that keeps files matching a pattern."""
+    return "day1" in tag.get("file_name", "")  # type: ignore
+
+
+# Transform functions
+
+
+def transform_add_prefix(tag: Tag, packet: Packet) -> tuple[Tag, Packet]:
+    """Transform that adds prefix to file_name tag."""
+    new_tag = tag.copy()
+    if "file_name" in new_tag:
+        new_tag["file_name"] = f"prefix_{new_tag['file_name']}"
+    return new_tag, packet
+
+
+def transform_rename_keys(tag: Tag, packet: Packet) -> tuple[Tag, Packet]:
+    """Transform that renames packet keys."""
+    new_packet = packet.copy()
+    if "txt_file" in new_packet:
+        new_packet["content"] = new_packet.pop("txt_file")
+    return tag, new_packet
diff --git a/tests/test_streams_operations/test_mappers/__init__.py b/tests/test_streams_operations/test_mappers/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_streams_operations/test_mappers/test_batch.py b/tests/test_streams_operations/test_mappers/test_batch.py
new file mode 100644
index 0000000..b30701e
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_batch.py
@@ -0,0 +1,290 @@
+"""Tests for Batch mapper functionality."""
+
+import pytest
+from orcabridge.mappers import Batch
+from orcabridge.streams import SyncStreamFromLists
+
+
+class TestBatch:
+    """Test cases for Batch mapper."""
+
+    def test_batch_basic(self, sample_tags, sample_packets):
+        """Test basic batch functionality."""
+        stream = SyncStreamFromLists(sample_tags, sample_packets)
+        batch = Batch(2, drop_last=False)
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        # Should have 2 batches: [packet1, packet2] and [packet3]
+        assert len(result) == 2
+
+        batch1_tag, batch1_packet = result[0]
+        batch2_tag, batch2_packet = result[1]
+
+        # First batch should have 2 items
+        assert len(batch1_packet["txt_file"]) == 2
+        for k, v in batch1_packet.items():
+            assert v == [p[k] for p in sample_packets[:2]]
+
+        assert len(batch2_packet["txt_file"]) == 1
+        for k, v in batch2_packet.items():
+            assert v == [p[k] for p in sample_packets[2:]]
+
+    def test_batch_exact_division(self):
+        """Test batch when stream length divides evenly by batch size."""
+        packets = [1, 2, 3, 4, 5, 6]
+        tags = ["a", "b", "c", "d", "e", "f"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        batch = Batch(3)
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        # Should have exactly 2 batches
+        assert len(result) == 2
+
+        batch1_packet, _ = result[0]
+        batch2_packet, _ = result[1]
+
+        assert len(batch1_packet) == 3
+        assert len(batch2_packet) == 3
+        assert list(batch1_packet) == [1, 2, 3]
+        assert list(batch2_packet) == [4, 5, 6]
+
+    def test_batch_size_one(self, sample_packets, sample_tags):
+        """Test batch with size 1."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        batch = Batch(1)
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        # Should have same number of batches as original packets
+        assert len(result) == len(sample_packets)
+
+        for i, (batch_packet, batch_tag) in enumerate(result):
+            assert len(batch_packet) == 1
+            assert list(batch_packet) == [sample_packets[i]]
+
+    def test_batch_larger_than_stream(self, sample_packets, sample_tags):
+        """Test batch size larger than stream."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        batch = Batch(10)  # Larger than sample_packets length
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        # Should have exactly 1 batch with all packets
+        assert len(result) == 1
+
+        batch_packet, batch_tag = result[0]
+        assert len(batch_packet) == len(sample_packets)
+        assert list(batch_packet) == sample_packets
+
+    def test_batch_empty_stream(self):
+        """Test batch with empty stream."""
+        empty_stream = SyncStreamFromLists([], [])
+        batch = Batch(3)
+        batched_stream = batch(empty_stream)
+
+        result = list(batched_stream)
+        assert len(result) == 0
+
+    def test_batch_preserves_packet_types(self):
+        """Test that batch preserves different packet types."""
+        packets = [PacketType("data1"), {"key": "value"}, [1, 2, 3], 42, "string"]
+        tags = ["type1", "type2", "type3", "type4", "type5"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        batch = Batch(2)
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        # Should have 3 batches: [2, 2, 1]
+        assert len(result) == 3
+
+        # Check first batch
+        batch1_packet, _ = result[0]
+        batch1_list = list(batch1_packet)
+        assert batch1_list[0] == PacketType("data1")
+        assert batch1_list[1] == {"key": "value"}
+
+        # Check second batch
+        batch2_packet, _ = result[1]
+        batch2_list = list(batch2_packet)
+        assert batch2_list[0] == [1, 2, 3]
+        assert batch2_list[1] == 42
+
+        # Check third batch
+        batch3_packet, _ = result[2]
+        batch3_list = list(batch3_packet)
+        assert batch3_list[0] == "string"
+
+    def test_batch_tag_handling(self, sample_packets, sample_tags):
+        """Test how batch handles tags."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        batch = Batch(2)
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        # Each batch should have some representation of the constituent tags
+        for batch_packet, batch_tag in result:
+            assert batch_tag is not None
+            # The exact format depends on implementation
+
+    def test_batch_maintains_order(self):
+        """Test that batch maintains packet order within batches."""
+        packets = [f"packet_{i}" for i in range(10)]
+        tags = [f"tag_{i}" for i in range(10)]
+
+        stream = SyncStreamFromLists(packets, tags)
+        batch = Batch(3)
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        # Should have 4 batches: [3, 3, 3, 1]
+        assert len(result) == 4
+
+        # Check order within each batch
+        all_packets = []
+        for batch_packet, _ in result:
+            all_packets.extend(list(batch_packet))
+
+        assert all_packets == packets
+
+    def test_batch_large_stream(self):
+        """Test batch with large stream."""
+        packets = [f"packet_{i}" for i in range(1000)]
+        tags = [f"tag_{i}" for i in range(1000)]
+
+        stream = SyncStreamFromLists(packets, tags)
+        batch = Batch(50)
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        # Should have exactly 20 batches of 50 each
+        assert len(result) == 20
+
+        for i, (batch_packet, _) in enumerate(result):
+            assert len(batch_packet) == 50
+            expected_packets = packets[i * 50 : (i + 1) * 50]
+            assert list(batch_packet) == expected_packets
+
+    def test_batch_invalid_size(self):
+        """Test batch with invalid size."""
+        with pytest.raises(ValueError):
+            Batch(0)
+
+        with pytest.raises(ValueError):
+            Batch(-1)
+
+        with pytest.raises(TypeError):
+            Batch(3.5)
+
+        with pytest.raises(TypeError):
+            Batch("3")
+
+    def test_batch_chaining(self, sample_packets, sample_tags):
+        """Test chaining batch operations."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+
+        # First batch: size 2
+        batch1 = Batch(2)
+        stream1 = batch1(stream)
+
+        # Second batch: size 1 (batch the batches)
+        batch2 = Batch(1)
+        stream2 = batch2(stream1)
+
+        result = list(stream2)
+
+        # Each item should be a batch containing a single batch
+        for batch_packet, _ in result:
+            assert len(batch_packet) == 1
+            # The contained item should itself be a batch
+
+    def test_batch_with_generator_stream(self):
+        """Test batch with generator-based stream."""
+
+        def packet_generator():
+            for i in range(7):
+                yield f"packet_{i}", f"tag_{i}"
+
+        from orcabridge.stream import SyncStreamFromGenerator
+
+        stream = SyncStreamFromGenerator(packet_generator())
+
+        batch = Batch(3)
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        # Should have 3 batches: [3, 3, 1]
+        assert len(result) == 3
+
+        batch1_packet, _ = result[0]
+        batch2_packet, _ = result[1]
+        batch3_packet, _ = result[2]
+
+        assert len(batch1_packet) == 3
+        assert len(batch2_packet) == 3
+        assert len(batch3_packet) == 1
+
+    def test_batch_memory_efficiency(self):
+        """Test that batch doesn't consume excessive memory."""
+        # Create a large stream
+        packets = [f"packet_{i}" for i in range(10000)]
+        tags = [f"tag_{i}" for i in range(10000)]
+
+        stream = SyncStreamFromLists(packets, tags)
+        batch = Batch(100)
+        batched_stream = batch(stream)
+
+        # Process one batch at a time to test memory efficiency
+        batch_count = 0
+        for batch_packet, _ in batched_stream:
+            batch_count += 1
+            assert len(batch_packet) <= 100
+            if batch_count == 50:  # Stop early to avoid processing everything
+                break
+
+        assert batch_count == 50
+
+    def test_batch_with_none_packets(self):
+        """Test batch with None packets."""
+        packets = [1, None, 3, None, 5, None]
+        tags = ["num1", "null1", "num3", "null2", "num5", "null3"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        batch = Batch(2)
+        batched_stream = batch(stream)
+
+        result = list(batched_stream)
+
+        assert len(result) == 3
+
+        # Check that None values are preserved
+        all_packets = []
+        for batch_packet, _ in result:
+            all_packets.extend(list(batch_packet))
+
+        assert all_packets == packets
+
+    def test_batch_pickle(self):
+        """Test that Batch mapper is pickleable."""
+        import pickle
+        from orcabridge.mappers import Batch
+
+        batch = Batch(batch_size=3)
+        pickled = pickle.dumps(batch)
+        unpickled = pickle.loads(pickled)
+
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, Batch)
+        assert unpickled.batch_size == batch.batch_size
diff --git a/tests/test_streams_operations/test_mappers/test_cache_stream.py b/tests/test_streams_operations/test_mappers/test_cache_stream.py
new file mode 100644
index 0000000..feefb61
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_cache_stream.py
@@ -0,0 +1,299 @@
+"""
+Test module for CacheStream mapper.
+
+This module tests the CacheStream mapper functionality, which provides
+caching capabilities to avoid upstream recomputation by storing stream data
+in memory after the first iteration.
+"""
+
+import pytest
+from unittest.mock import Mock
+
+from orcabridge.base import SyncStream
+from orcabridge.mapper import CacheStream
+from orcabridge.stream import SyncStreamFromLists
+
+
+@pytest.fixture
+def cache_mapper():
+    """Create a CacheStream mapper instance."""
+    return CacheStream()
+
+
+@pytest.fixture
+def sample_stream_data():
+    """Sample stream data for testing."""
+    return [
+        ({"id": 1}, {"value": 10}),
+        ({"id": 2}, {"value": 20}),
+        ({"id": 3}, {"value": 30}),
+    ]
+
+
+@pytest.fixture
+def sample_stream(sample_stream_data):
+    """Create a sample stream."""
+    tags, packets = zip(*sample_stream_data)
+    return SyncStreamFromLists(list(tags), list(packets))
+
+
+class TestCacheStream:
+    """Test cases for CacheStream mapper."""
+
+    def test_cache_initialization(self, cache_mapper):
+        """Test that CacheStream initializes with empty cache."""
+        assert cache_mapper.cache == []
+        assert cache_mapper.is_cached is False
+
+    def test_repr(self, cache_mapper):
+        """Test CacheStream string representation."""
+        assert repr(cache_mapper) == "CacheStream(active:False)"
+
+        # After caching
+        cache_mapper.is_cached = True
+        assert repr(cache_mapper) == "CacheStream(active:True)"
+
+    def test_first_iteration_caches_data(self, cache_mapper, sample_stream):
+        """Test that first iteration through stream caches the data."""
+        cached_stream = cache_mapper(sample_stream)
+
+        # Initially not cached
+        assert not cache_mapper.is_cached
+        assert len(cache_mapper.cache) == 0
+
+        # Iterate through stream
+        result = list(cached_stream)
+
+        # After iteration, should be cached
+        assert cache_mapper.is_cached
+        assert len(cache_mapper.cache) == 3
+        assert cache_mapper.cache == [
+            ({"id": 1}, {"value": 10}),
+            ({"id": 2}, {"value": 20}),
+            ({"id": 3}, {"value": 30}),
+        ]
+
+        # Result should match original stream
+        assert result == [
+            ({"id": 1}, {"value": 10}),
+            ({"id": 2}, {"value": 20}),
+            ({"id": 3}, {"value": 30}),
+        ]
+
+    def test_subsequent_iterations_use_cache(self, cache_mapper, sample_stream):
+        """Test that subsequent iterations use cached data."""
+        cached_stream = cache_mapper(sample_stream)
+
+        # First iteration
+        first_result = list(cached_stream)
+        assert cache_mapper.is_cached
+
+        # Create new stream from same mapper (simulates reuse)
+        second_cached_stream = cache_mapper()  # No input streams for cached version
+        second_result = list(second_cached_stream)
+
+        # Results should be identical
+        assert first_result == second_result
+        assert second_result == [
+            ({"id": 1}, {"value": 10}),
+            ({"id": 2}, {"value": 20}),
+            ({"id": 3}, {"value": 30}),
+        ]
+
+    def test_clear_cache(self, cache_mapper, sample_stream):
+        """Test cache clearing functionality."""
+        cached_stream = cache_mapper(sample_stream)
+
+        # Cache some data
+        list(cached_stream)
+        assert cache_mapper.is_cached
+        assert len(cache_mapper.cache) > 0
+
+        # Clear cache
+        cache_mapper.clear_cache()
+        assert not cache_mapper.is_cached
+        assert len(cache_mapper.cache) == 0
+
+    def test_multiple_streams_error_when_not_cached(self, cache_mapper, sample_stream):
+        """Test that providing multiple streams raises error when not cached."""
+        stream2 = SyncStreamFromLists([{"id": 4}], [{"value": 40}])
+
+        with pytest.raises(
+            ValueError, match="CacheStream operation requires exactly one stream"
+        ):
+            cache_mapper(sample_stream, stream2)
+
+    def test_no_streams_when_cached(self, cache_mapper, sample_stream):
+        """Test that cached stream can be called without input streams."""
+        # First, cache some data
+        cached_stream = cache_mapper(sample_stream)
+        list(cached_stream)  # This caches the data
+
+        # Now call without streams (should use cache)
+        cached_only_stream = cache_mapper()
+        result = list(cached_only_stream)
+
+        assert result == [
+            ({"id": 1}, {"value": 10}),
+            ({"id": 2}, {"value": 20}),
+            ({"id": 3}, {"value": 30}),
+        ]
+
+    def test_empty_stream_caching(self, cache_mapper):
+        """Test caching behavior with empty stream."""
+        empty_stream = SyncStreamFromLists([], [])
+        cached_stream = cache_mapper(empty_stream)
+
+        result = list(cached_stream)
+
+        assert result == []
+        assert cache_mapper.is_cached
+        assert cache_mapper.cache == []
+
+    def test_identity_structure(self, cache_mapper, sample_stream):
+        """Test that CacheStream has unique identity structure."""
+        # CacheStream should return None for identity structure
+        # to treat every instance as different
+        assert cache_mapper.identity_structure(sample_stream) is None
+
+    def test_avoids_upstream_recomputation(self, cache_mapper):
+        """Test that CacheStream avoids upstream recomputation."""
+        # Create a mock stream that tracks how many times it's iterated
+        iteration_count = {"count": 0}
+
+        def counting_generator():
+            iteration_count["count"] += 1
+            yield ({"id": 1}, {"value": 10})
+            yield ({"id": 2}, {"value": 20})
+
+        mock_stream = Mock(spec=SyncStream)
+        mock_stream.__iter__ = counting_generator
+
+        cached_stream = cache_mapper(mock_stream)
+
+        # First iteration should call upstream
+        list(cached_stream)
+        assert iteration_count["count"] == 1
+
+        # Second iteration should use cache (not call upstream)
+        second_cached_stream = cache_mapper()
+        list(second_cached_stream)
+        assert iteration_count["count"] == 1  # Should still be 1
+
+    def test_cache_with_different_data_types(self, cache_mapper):
+        """Test caching with various data types."""
+        complex_data = [
+            ({"id": 1, "type": "string"}, {"data": "hello", "numbers": [1, 2, 3]}),
+            ({"id": 2, "type": "dict"}, {"data": {"nested": True}, "numbers": None}),
+            ({"id": 3, "type": "boolean"}, {"data": True, "numbers": 42}),
+        ]
+
+        tags, packets = zip(*complex_data)
+        stream = SyncStreamFromLists(list(tags), list(packets))
+        cached_stream = cache_mapper(stream)
+
+        result = list(cached_stream)
+
+        assert result == complex_data
+        assert cache_mapper.is_cached
+        assert cache_mapper.cache == complex_data
+
+    def test_multiple_cache_instances(self, sample_stream):
+        """Test that different CacheStream instances have separate caches."""
+        cache1 = CacheStream()
+        cache2 = CacheStream()
+
+        # Cache in first instance
+        cached_stream1 = cache1(sample_stream)
+        list(cached_stream1)
+
+        # Second instance should not be cached
+        assert cache1.is_cached
+        assert not cache2.is_cached
+        assert len(cache1.cache) == 3
+        assert len(cache2.cache) == 0
+
+    def test_keys_method(self, cache_mapper, sample_stream):
+        """Test that CacheStream passes through keys correctly."""
+        # CacheStream should inherit keys from input stream
+        tag_keys, packet_keys = cache_mapper.keys(sample_stream)
+        original_tag_keys, original_packet_keys = sample_stream.keys()
+
+        assert tag_keys == original_tag_keys
+        assert packet_keys == original_packet_keys
+
+    def test_chaining_with_cache(self, cache_mapper, sample_stream):
+        """Test chaining CacheStream with other operations."""
+        from orcabridge.mapper import Filter
+
+        # Chain cache with filter
+        filter_mapper = Filter(lambda tag, packet: tag["id"] > 1)
+
+        # Cache first, then filter
+        cached_stream = cache_mapper(sample_stream)
+        filtered_stream = filter_mapper(cached_stream)
+
+        result = list(filtered_stream)
+
+        assert len(result) == 2  # Should have filtered out id=1
+        assert result == [
+            ({"id": 2}, {"value": 20}),
+            ({"id": 3}, {"value": 30}),
+        ]
+
+        # Cache should still be populated with original data
+        assert cache_mapper.is_cached
+        assert len(cache_mapper.cache) == 3
+
+    def test_cache_persistence_across_multiple_outputs(
+        self, cache_mapper, sample_stream
+    ):
+        """Test that cache persists when creating multiple output streams."""
+        # First stream
+        stream1 = cache_mapper(sample_stream)
+        result1 = list(stream1)
+
+        # Second stream from same cache
+        stream2 = cache_mapper()
+        result2 = list(stream2)
+
+        # Third stream from same cache
+        stream3 = cache_mapper()
+        result3 = list(stream3)
+
+        # All results should be identical
+        assert result1 == result2 == result3
+        assert len(result1) == 3
+
+    def test_error_handling_during_caching(self, cache_mapper):
+        """Test error handling when upstream stream raises exception."""
+
+        def error_generator():
+            yield ({"id": 1}, {"value": 10})
+            raise ValueError("Upstream error")
+
+        mock_stream = Mock(spec=SyncStream)
+        mock_stream.__iter__ = error_generator
+
+        cached_stream = cache_mapper(mock_stream)
+
+        # Should propagate the error and not cache partial data
+        with pytest.raises(ValueError, match="Upstream error"):
+            list(cached_stream)
+
+        # Cache should remain empty after error
+        assert not cache_mapper.is_cached
+        assert len(cache_mapper.cache) == 0
+
+    def test_cache_stream_pickle(self):
+        """Test that CacheStream mapper is pickleable."""
+        import pickle
+        from orcabridge.mappers import CacheStream
+
+        cache_stream = CacheStream()
+        pickled = pickle.dumps(cache_stream)
+        unpickled = pickle.loads(pickled)
+
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, CacheStream)
+        assert unpickled.__class__.__name__ == "CacheStream"
diff --git a/tests/test_streams_operations/test_mappers/test_default_tag.py b/tests/test_streams_operations/test_mappers/test_default_tag.py
new file mode 100644
index 0000000..281002b
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_default_tag.py
@@ -0,0 +1,260 @@
+"""Tests for DefaultTag mapper functionality."""
+
+import pytest
+from orcabridge.base import PacketType
+from orcabridge.mapper import DefaultTag
+from orcabridge.stream import SyncStreamFromLists
+
+
+class TestDefaultTag:
+    """Test cases for DefaultTag mapper."""
+
+    def test_default_tag_basic(self, sample_packets):
+        """Test basic default tag functionality."""
+        tags = ["existing1", None, "existing2"]
+
+        stream = SyncStreamFromLists(sample_packets, tags)
+        default_tag = DefaultTag("default_value")
+        result_stream = default_tag(stream)
+
+        result = list(result_stream)
+
+        expected_tags = ["existing1", "default_value", "existing2"]
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == sample_packets
+        assert actual_tags == expected_tags
+
+    def test_default_tag_all_none(self, sample_packets):
+        """Test default tag when all tags are None."""
+        tags = [None, None, None]
+
+        stream = SyncStreamFromLists(sample_packets, tags)
+        default_tag = DefaultTag("fallback")
+        result_stream = default_tag(stream)
+
+        result = list(result_stream)
+
+        expected_tags = ["fallback", "fallback", "fallback"]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_tags == expected_tags
+
+    def test_default_tag_no_none(self, sample_packets, sample_tags):
+        """Test default tag when no tags are None."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        default_tag = DefaultTag("unused_default")
+        result_stream = default_tag(stream)
+
+        result = list(result_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        # Should remain unchanged
+        assert actual_packets == sample_packets
+        assert actual_tags == sample_tags
+
+    def test_default_tag_empty_stream(self):
+        """Test default tag with empty stream."""
+        empty_stream = SyncStreamFromLists([], [])
+        default_tag = DefaultTag("default")
+        result_stream = default_tag(empty_stream)
+
+        result = list(result_stream)
+        assert len(result) == 0
+
+    def test_default_tag_different_types(self):
+        """Test default tag with different default value types."""
+        packets = ["data1", "data2", "data3"]
+        tags = [None, "existing", None]
+
+        # Test with string default
+        stream1 = SyncStreamFromLists(packets, tags)
+        default_tag1 = DefaultTag("string_default")
+        result1 = list(default_tag1(stream1))
+
+        expected_tags1 = ["string_default", "existing", "string_default"]
+        actual_tags1 = [tag for _, tag in result1]
+        assert actual_tags1 == expected_tags1
+
+        # Test with numeric default
+        stream2 = SyncStreamFromLists(packets, tags)
+        default_tag2 = DefaultTag(42)
+        result2 = list(default_tag2(stream2))
+
+        expected_tags2 = [42, "existing", 42]
+        actual_tags2 = [tag for _, tag in result2]
+        assert actual_tags2 == expected_tags2
+
+    def test_default_tag_empty_string_vs_none(self):
+        """Test default tag distinguishes between empty string and None."""
+        packets = ["data1", "data2", "data3"]
+        tags = [None, "", None]  # Empty string vs None
+
+        stream = SyncStreamFromLists(packets, tags)
+        default_tag = DefaultTag("default")
+        result_stream = default_tag(stream)
+
+        result = list(result_stream)
+
+        # Empty string should be preserved, None should be replaced
+        expected_tags = ["default", "", "default"]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_tags == expected_tags
+
+    def test_default_tag_preserves_packets(self):
+        """Test that default tag preserves all packet types."""
+        packets = [PacketType("data1"), {"key": "value"}, [1, 2, 3], 42, "string"]
+        tags = [None, None, "existing", None, None]
+
+        stream = SyncStreamFromLists(packets, tags)
+        default_tag = DefaultTag("default")
+        result_stream = default_tag(stream)
+
+        result = list(result_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        expected_tags = ["default", "default", "existing", "default", "default"]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == packets
+        assert actual_tags == expected_tags
+
+    def test_default_tag_with_complex_default(self):
+        """Test default tag with complex default value."""
+        packets = ["data1", "data2"]
+        tags = [None, "existing"]
+
+        default_value = {"type": "default", "timestamp": 12345}
+
+        stream = SyncStreamFromLists(packets, tags)
+        default_tag = DefaultTag(default_value)
+        result_stream = default_tag(stream)
+
+        result = list(result_stream)
+
+        expected_tags = [default_value, "existing"]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_tags == expected_tags
+        assert actual_tags[0] is default_value  # Should be the same object
+
+    def test_default_tag_chaining(self, sample_packets):
+        """Test chaining multiple default tag operations."""
+        tags = [None, "middle", None]
+
+        stream = SyncStreamFromLists(sample_packets, tags)
+
+        # First default tag
+        default_tag1 = DefaultTag("first_default")
+        stream1 = default_tag1(stream)
+
+        # Create new stream with some None tags again
+        intermediate_result = list(stream1)
+        new_tags = [
+            None if tag == "first_default" else tag for _, tag in intermediate_result
+        ]
+        new_packets = [packet for packet, _ in intermediate_result]
+
+        stream2 = SyncStreamFromLists(new_packets, new_tags)
+        default_tag2 = DefaultTag("second_default")
+        stream3 = default_tag2(stream2)
+
+        final_result = list(stream3)
+
+        # The "middle" tag should be preserved
+        actual_tags = [tag for _, tag in final_result]
+        assert "middle" in actual_tags
+        assert "second_default" in actual_tags
+
+    def test_default_tag_maintains_order(self):
+        """Test that default tag maintains packet order."""
+        packets = [f"packet_{i}" for i in range(10)]
+        tags = [None if i % 2 == 0 else f"tag_{i}" for i in range(10)]
+
+        stream = SyncStreamFromLists(packets, tags)
+        default_tag = DefaultTag("even_default")
+        result_stream = default_tag(stream)
+
+        result = list(result_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == packets
+
+        # Check that even indices got default tags, odd indices kept original
+        for i in range(10):
+            if i % 2 == 0:
+                assert actual_tags[i] == "even_default"
+            else:
+                assert actual_tags[i] == f"tag_{i}"
+
+    def test_default_tag_with_callable_default(self):
+        """Test default tag with callable default (if supported)."""
+        packets = ["data1", "data2", "data3"]
+        tags = [None, "existing", None]
+
+        # Simple callable that returns a counter
+        class DefaultGenerator:
+            def __init__(self):
+                self.count = 0
+
+            def __call__(self):
+                self.count += 1
+                return f"default_{self.count}"
+
+        # If the implementation supports callable defaults
+        try:
+            default_gen = DefaultGenerator()
+            stream = SyncStreamFromLists(packets, tags)
+            default_tag = DefaultTag(default_gen)
+            result_stream = default_tag(stream)
+
+            result = list(result_stream)
+            actual_tags = [tag for _, tag in result]
+
+            # This would only work if DefaultTag supports callable defaults
+            # Otherwise this test should be skipped or modified
+            assert "existing" in actual_tags
+        except (TypeError, AttributeError):
+            # If callable defaults are not supported, that's fine
+            pass
+
+    def test_default_tag_large_stream(self):
+        """Test default tag with large stream."""
+        packets = [f"packet_{i}" for i in range(1000)]
+        tags = [None if i % 3 == 0 else f"tag_{i}" for i in range(1000)]
+
+        stream = SyncStreamFromLists(packets, tags)
+        default_tag = DefaultTag("bulk_default")
+        result_stream = default_tag(stream)
+
+        result = list(result_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert len(actual_packets) == 1000
+        assert len(actual_tags) == 1000
+
+        # Check that every third tag was replaced
+        for i in range(1000):
+            if i % 3 == 0:
+                assert actual_tags[i] == "bulk_default"
+            else:
+                assert actual_tags[i] == f"tag_{i}"    def test_default_tag_pickle(self):
+        """Test that DefaultTag mapper is pickleable."""
+        import pickle
+        from orcabridge.mappers import DefaultTag
+        
+        default_tag = DefaultTag({"default": "test"})
+        pickled = pickle.dumps(default_tag)
+        unpickled = pickle.loads(pickled)
+        
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, DefaultTag)
+        assert unpickled.default_tag == default_tag.default_tag
diff --git a/tests/test_streams_operations/test_mappers/test_filter.py b/tests/test_streams_operations/test_mappers/test_filter.py
new file mode 100644
index 0000000..b16049d
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_filter.py
@@ -0,0 +1,325 @@
+"""Tests for Filter mapper functionality."""
+
+import pytest
+from orcabridge.base import PacketType
+from orcabridge.mapper import Filter
+from orcabridge.stream import SyncStreamFromLists
+
+
+class TestFilter:
+    """Test cases for Filter mapper."""
+
+    def test_filter_basic(self, simple_predicate):
+        """Test basic filter functionality."""
+        packets = [1, 2, 3, 4, 5, 6]
+        tags = ["odd", "even", "odd", "even", "odd", "even"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        filter_mapper = Filter(simple_predicate)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        # Should keep only even numbers
+        expected_packets = [2, 4, 6]
+        expected_tags = ["even", "even", "even"]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_filter_none_match(self, sample_packets, sample_tags):
+        """Test filter when no packets match."""
+
+        def never_matches(packet, tag):
+            return False
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        filter_mapper = Filter(never_matches)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+        assert len(result) == 0
+
+    def test_filter_all_match(self, sample_packets, sample_tags):
+        """Test filter when all packets match."""
+
+        def always_matches(packet, tag):
+            return True
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        filter_mapper = Filter(always_matches)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == sample_packets
+        assert actual_tags == sample_tags
+
+    def test_filter_empty_stream(self, simple_predicate):
+        """Test filter with empty stream."""
+        empty_stream = SyncStreamFromLists([], [])
+        filter_mapper = Filter(simple_predicate)
+        filtered_stream = filter_mapper(empty_stream)
+
+        result = list(filtered_stream)
+        assert len(result) == 0
+
+    def test_filter_string_predicate(self):
+        """Test filter with string-based predicate."""
+        packets = ["apple", "banana", "cherry", "date", "elderberry"]
+        tags = ["fruit1", "fruit2", "fruit3", "fruit4", "fruit5"]
+
+        def starts_with_vowel(packet, tag):
+            return isinstance(packet, str) and packet[0].lower() in "aeiou"
+
+        stream = SyncStreamFromLists(packets, tags)
+        filter_mapper = Filter(starts_with_vowel)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        expected_packets = ["apple", "elderberry"]
+        expected_tags = ["fruit1", "fruit5"]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_filter_tag_based_predicate(self):
+        """Test filter using tag information."""
+        packets = [10, 20, 30, 40, 50]
+        tags = ["small", "medium", "large", "huge", "enormous"]
+
+        def tag_length_filter(packet, tag):
+            return len(tag) <= 5
+
+        stream = SyncStreamFromLists(packets, tags)
+        filter_mapper = Filter(tag_length_filter)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        expected_packets = [10, 40]  # "small" and "huge" have <= 5 chars
+        expected_tags = ["small", "huge"]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_filter_complex_predicate(self):
+        """Test filter with complex predicate."""
+        packets = [
+            {"value": 5, "type": "A", "active": True},
+            {"value": 15, "type": "B", "active": False},
+            {"value": 25, "type": "A", "active": True},
+            {"value": 35, "type": "C", "active": True},
+            {"value": 45, "type": "A", "active": False},
+        ]
+        tags = ["item1", "item2", "item3", "item4", "item5"]
+
+        def complex_predicate(packet, tag):
+            return (
+                isinstance(packet, dict)
+                and packet.get("type") == "A"
+                and packet.get("active", False)
+                and packet.get("value", 0) > 10
+            )
+
+        stream = SyncStreamFromLists(packets, tags)
+        filter_mapper = Filter(complex_predicate)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        # Only the third item matches all conditions
+        expected_packets = [{"value": 25, "type": "A", "active": True}]
+        expected_tags = ["item3"]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_filter_with_none_packets(self):
+        """Test filter with None packets."""
+        packets = [None, "data", None, "more_data", None]
+        tags = ["empty1", "full1", "empty2", "full2", "empty3"]
+
+        def not_none(packet, tag):
+            return packet is not None
+
+        stream = SyncStreamFromLists(packets, tags)
+        filter_mapper = Filter(not_none)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        expected_packets = ["data", "more_data"]
+        expected_tags = ["full1", "full2"]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_filter_preserves_packet_types(self):
+        """Test that filter preserves packet types."""
+        packets = [PacketType("data1"), [1, 2, 3], {"key": "value"}, "string", 42]
+        tags = ["type1", "type2", "type3", "type4", "type5"]
+
+        def is_container(packet, tag):
+            return isinstance(packet, (list, dict))
+
+        stream = SyncStreamFromLists(packets, tags)
+        filter_mapper = Filter(is_container)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        expected_packets = [[1, 2, 3], {"key": "value"}]
+        expected_tags = ["type2", "type3"]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+        assert isinstance(actual_packets[0], list)
+        assert isinstance(actual_packets[1], dict)
+
+    def test_filter_maintains_order(self):
+        """Test that filter maintains packet order."""
+        packets = [f"packet_{i}" for i in range(20)]
+        tags = [f"tag_{i}" for i in range(20)]
+
+        def keep_even_indices(packet, tag):
+            # Extract index from packet name
+            index = int(packet.split("_")[1])
+            return index % 2 == 0
+
+        stream = SyncStreamFromLists(packets, tags)
+        filter_mapper = Filter(keep_even_indices)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        expected_packets = [f"packet_{i}" for i in range(0, 20, 2)]
+        expected_tags = [f"tag_{i}" for i in range(0, 20, 2)]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_filter_predicate_exception(self, sample_packets, sample_tags):
+        """Test filter when predicate raises exception."""
+
+        def error_predicate(packet, tag):
+            if packet == sample_packets[1]:  # Error on second packet
+                raise ValueError("Predicate error")
+            return True
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        filter_mapper = Filter(error_predicate)
+        filtered_stream = filter_mapper(stream)
+
+        # Should propagate the exception
+        with pytest.raises(ValueError):
+            list(filtered_stream)
+
+    def test_filter_with_lambda(self):
+        """Test filter with lambda predicate."""
+        packets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        tags = [f"num_{i}" for i in packets]
+
+        stream = SyncStreamFromLists(packets, tags)
+        filter_mapper = Filter(lambda p, t: p % 3 == 0)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        expected_packets = [3, 6, 9]
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets
+
+    def test_filter_chaining(self):
+        """Test chaining multiple filter operations."""
+        packets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        tags = [f"num_{i}" for i in packets]
+
+        stream = SyncStreamFromLists(packets, tags)
+
+        # First filter: keep even numbers
+        filter1 = Filter(lambda p, t: p % 2 == 0)
+        stream1 = filter1(stream)
+
+        # Second filter: keep numbers > 4
+        filter2 = Filter(lambda p, t: p > 4)
+        stream2 = filter2(stream1)
+
+        result = list(stream2)
+
+        expected_packets = [6, 8, 10]  # Even numbers > 4
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets
+
+    def test_filter_with_generator_stream(self):
+        """Test filter with generator-based stream."""
+
+        def packet_generator():
+            for i in range(20):
+                yield i, f"tag_{i}"
+
+        from orcabridge.stream import SyncStreamFromGenerator
+
+        stream = SyncStreamFromGenerator(packet_generator())
+
+        def is_prime(packet, tag):
+            if packet < 2:
+                return False
+            for i in range(2, int(packet**0.5) + 1):
+                if packet % i == 0:
+                    return False
+            return True
+
+        filter_mapper = Filter(is_prime)
+        filtered_stream = filter_mapper(stream)
+
+        result = list(filtered_stream)
+
+        # Prime numbers under 20: 2, 3, 5, 7, 11, 13, 17, 19
+        expected_packets = [2, 3, 5, 7, 11, 13, 17, 19]
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets
+
+    def test_filter_pickle(self):
+        """Test that Filter mapper is pickleable."""
+        import pickle
+        from orcabridge.mappers import Filter
+
+        def is_even(tag, packet):
+            return packet % 2 == 0
+
+        filter_mapper = Filter(is_even)
+        pickled = pickle.dumps(filter_mapper)
+        unpickled = pickle.loads(pickled)
+
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, Filter)
+        assert unpickled.__class__.__name__ == "Filter"
diff --git a/tests/test_streams_operations/test_mappers/test_first_match.py b/tests/test_streams_operations/test_mappers/test_first_match.py
new file mode 100644
index 0000000..b282ebc
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_first_match.py
@@ -0,0 +1,244 @@
+"""Tests for FirstMatch mapper functionality."""
+
+import pytest
+from orcabridge.base import PacketType
+from orcabridge.mapper import FirstMatch
+from orcabridge.stream import SyncStreamFromLists
+
+
+class TestFirstMatch:
+    """Test cases for FirstMatch mapper."""
+
+    def test_first_match_basic(self, simple_predicate):
+        """Test basic first match functionality."""
+        packets = [1, 2, 3, 4, 5]
+        tags = ["odd", "even", "odd", "even", "odd"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        first_match = FirstMatch(simple_predicate)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+
+        # Should find the first packet that matches the predicate
+        assert len(result) == 1
+        packet, tag = result[0]
+        assert packet == 2  # First even number
+        assert tag == "even"
+
+    def test_first_match_no_match(self, sample_packets, sample_tags):
+        """Test first match when no packet matches."""
+
+        def never_matches(packet, tag):
+            return False
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        first_match = FirstMatch(never_matches)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+        assert len(result) == 0
+
+    def test_first_match_all_match(self, sample_packets, sample_tags):
+        """Test first match when all packets match."""
+
+        def always_matches(packet, tag):
+            return True
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        first_match = FirstMatch(always_matches)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+
+        # Should return only the first packet
+        assert len(result) == 1
+        packet, tag = result[0]
+        assert packet == sample_packets[0]
+        assert tag == sample_tags[0]
+
+    def test_first_match_empty_stream(self, simple_predicate):
+        """Test first match with empty stream."""
+        empty_stream = SyncStreamFromLists([], [])
+        first_match = FirstMatch(simple_predicate)
+        result_stream = first_match(empty_stream)
+
+        result = list(result_stream)
+        assert len(result) == 0
+
+    def test_first_match_string_predicate(self):
+        """Test first match with string-based predicate."""
+        packets = ["apple", "banana", "cherry", "date"]
+        tags = ["fruit1", "fruit2", "fruit3", "fruit4"]
+
+        def starts_with_c(packet, tag):
+            return isinstance(packet, str) and packet.startswith("c")
+
+        stream = SyncStreamFromLists(packets, tags)
+        first_match = FirstMatch(starts_with_c)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+        assert len(result) == 1
+        packet, tag = result[0]
+        assert packet == "cherry"
+        assert tag == "fruit3"
+
+    def test_first_match_tag_based_predicate(self):
+        """Test first match using tag information."""
+        packets = [10, 20, 30, 40]
+        tags = ["small", "medium", "large", "huge"]
+
+        def tag_contains_e(packet, tag):
+            return "e" in tag
+
+        stream = SyncStreamFromLists(packets, tags)
+        first_match = FirstMatch(tag_contains_e)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+        assert len(result) == 1
+        packet, tag = result[0]
+        assert packet == 20  # "medium" contains 'e'
+        assert tag == "medium"
+
+    def test_first_match_complex_predicate(self):
+        """Test first match with complex predicate."""
+        packets = [
+            {"value": 5, "type": "A"},
+            {"value": 15, "type": "B"},
+            {"value": 25, "type": "A"},
+            {"value": 35, "type": "C"},
+        ]
+        tags = ["item1", "item2", "item3", "item4"]
+
+        def complex_predicate(packet, tag):
+            return (
+                isinstance(packet, dict)
+                and packet.get("value", 0) > 10
+                and packet.get("type") == "A"
+            )
+
+        stream = SyncStreamFromLists(packets, tags)
+        first_match = FirstMatch(complex_predicate)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+        assert len(result) == 1
+        packet, tag = result[0]
+        assert packet == {"value": 25, "type": "A"}
+        assert tag == "item3"
+
+    def test_first_match_with_none_packets(self):
+        """Test first match with None packets."""
+        packets = [None, "data", None, "more_data"]
+        tags = ["empty1", "full1", "empty2", "full2"]
+
+        def not_none(packet, tag):
+            return packet is not None
+
+        stream = SyncStreamFromLists(packets, tags)
+        first_match = FirstMatch(not_none)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+        assert len(result) == 1
+        packet, tag = result[0]
+        assert packet == "data"
+        assert tag == "full1"
+
+    def test_first_match_preserves_packet_types(self):
+        """Test that first match preserves packet types."""
+        packets = [PacketType("data1"), [1, 2, 3], {"key": "value"}, 42]
+        tags = ["str", "list", "dict", "int"]
+
+        def is_list(packet, tag):
+            return isinstance(packet, list)
+
+        stream = SyncStreamFromLists(packets, tags)
+        first_match = FirstMatch(is_list)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+        assert len(result) == 1
+        packet, tag = result[0]
+        assert packet == [1, 2, 3]
+        assert tag == "list"
+        assert isinstance(packet, list)
+
+    def test_first_match_predicate_exception(self, sample_packets, sample_tags):
+        """Test first match when predicate raises exception."""
+
+        def error_predicate(packet, tag):
+            if packet == sample_packets[1]:  # Error on second packet
+                raise ValueError("Predicate error")
+            return packet == sample_packets[2]  # Match third packet
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        first_match = FirstMatch(error_predicate)
+        result_stream = first_match(stream)
+
+        # The behavior here depends on implementation
+        # It might propagate the exception or skip the problematic packet
+        with pytest.raises(ValueError):
+            list(result_stream)
+
+    def test_first_match_with_generator_stream(self):
+        """Test first match with generator-based stream."""
+
+        def packet_generator():
+            for i in range(10):
+                yield f"packet_{i}", f"tag_{i}"
+
+        from orcabridge.stream import SyncStreamFromGenerator
+
+        stream = SyncStreamFromGenerator(packet_generator())
+
+        def find_packet_5(packet, tag):
+            return packet == "packet_5"
+
+        first_match = FirstMatch(find_packet_5)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+        assert len(result) == 1
+        packet, tag = result[0]
+        assert packet == "packet_5"
+        assert tag == "tag_5"
+
+    def test_first_match_early_termination(self):
+        """Test that first match terminates early and doesn't process remaining packets."""
+        processed_packets = []
+
+        def tracking_predicate(packet, tag):
+            processed_packets.append(packet)
+            return packet == "target"
+
+        packets = ["a", "b", "target", "c", "d"]
+        tags = ["tag1", "tag2", "tag3", "tag4", "tag5"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        first_match = FirstMatch(tracking_predicate)
+        result_stream = first_match(stream)
+
+        result = list(result_stream)
+
+        # Should have found the target
+        assert len(result) == 1
+        assert result[0][0] == "target"
+
+        # Should have stopped processing after finding the target
+        assert processed_packets == ["a", "b", "target"]
+
+    def test_first_match_pickle(self):
+        """Test that FirstMatch mapper is pickleable."""
+        import pickle
+        from orcabridge.mappers import FirstMatch
+
+        first_match = FirstMatch()
+        pickled = pickle.dumps(first_match)
+        unpickled = pickle.loads(pickled)
+
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, FirstMatch)
+        assert unpickled.__class__.__name__ == "FirstMatch"
diff --git a/tests/test_streams_operations/test_mappers/test_group_by.py b/tests/test_streams_operations/test_mappers/test_group_by.py
new file mode 100644
index 0000000..1594498
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_group_by.py
@@ -0,0 +1,298 @@
+"""Tests for GroupBy mapper functionality."""
+
+import pytest
+import pickle
+from orcabridge.mappers import GroupBy
+from orcabridge.streams import SyncStreamFromLists
+
+
+class TestGroupBy:
+    """Test cases for GroupBy mapper."""
+
+    def test_group_by_basic(self):
+        """Test basic groupby functionality."""
+        tags = [
+            {"category": "A", "id": "1"},
+            {"category": "B", "id": "2"},
+            {"category": "A", "id": "3"},
+            {"category": "B", "id": "4"},
+        ]
+        packets = [
+            {"value": "data/item1.txt", "name": "metadata/item1.json"},
+            {"value": "data/item2.txt", "name": "metadata/item2.json"},
+            {"value": "data/item3.txt", "name": "metadata/item3.json"},
+            {"value": "data/item4.txt", "name": "metadata/item4.json"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        group_by = GroupBy(group_keys=["category"])
+        grouped_stream = group_by(stream)
+
+        results = list(grouped_stream)
+
+        # Should have 2 groups (A and B)
+        assert len(results) == 2
+
+        # Check that all groups are present
+        categories_found = []
+        for tag, _ in results:
+            categories_found.extend(tag["category"])
+        categories = set(categories_found)
+        assert categories == {"A", "B"}
+
+        # Check grouped data structure
+        # With reduce_keys=False (default), everything should be lists including group keys
+        for tag, packet in results:
+            if tag["category"] == ["A", "A"]:  # Group key is also a list
+                assert tag["id"] == ["1", "3"]  # IDs for category A
+                assert packet["value"] == [
+                    "data/item1.txt",
+                    "data/item3.txt",
+                ]  # Values for category A
+                assert packet["name"] == ["metadata/item1.json", "metadata/item3.json"]
+            elif tag["category"] == ["B", "B"]:  # Group key is also a list
+                assert tag["id"] == ["2", "4"]  # IDs for category B
+                assert packet["value"] == [
+                    "data/item2.txt",
+                    "data/item4.txt",
+                ]  # Values for category B
+                assert packet["name"] == ["metadata/item2.json", "metadata/item4.json"]
+
+    def test_group_by_reduce_keys(self):
+        """Test groupby with reduce_keys=True."""
+        tags = [
+            {"category": "A", "id": "1", "extra": "x1"},
+            {"category": "A", "id": "2", "extra": "x2"},
+            {"category": "B", "id": "3", "extra": "x3"},
+        ]
+        packets = [
+            {"value": "data/item1.txt"},
+            {"value": "data/item2.txt"},
+            {"value": "data/item3.txt"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        group_by = GroupBy(group_keys=["category"], reduce_keys=True)
+        grouped_stream = group_by(stream)
+
+        results = list(grouped_stream)
+
+        for tag, packet in results:
+            if tag["category"] == "A":
+                # With reduce_keys=True, group keys become singular values
+                assert tag["category"] == "A"
+                # Non-group keys become lists
+                assert tag["id"] == ["1", "2"]
+                assert tag["extra"] == ["x1", "x2"]
+            elif tag["category"] == "B":
+                assert tag["category"] == "B"
+                assert tag["id"] == ["3"]
+                assert tag["extra"] == ["x3"]
+
+    def test_group_by_no_group_keys(self):
+        """Test groupby without specifying group_keys (uses all tag keys)."""
+        tags = [
+            {"category": "A", "id": "1"},
+            {"category": "A", "id": "1"},  # Duplicate
+            {"category": "B", "id": "2"},
+        ]
+        packets = [
+            {"value": "data/item1.txt"},
+            {"value": "data/item2.txt"},
+            {"value": "data/item3.txt"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        group_by = GroupBy()  # No group_keys specified
+        grouped_stream = group_by(stream)
+
+        results = list(grouped_stream)
+
+        # Should group by all tag keys (category, id)
+        assert len(results) == 2  # (A,1) and (B,2)
+
+        # Extract group keys, accounting for lists in the results
+        group_keys = set()
+        for tag, _ in results:
+            # When reduce_keys=False, all values are lists
+            category_list = tag["category"]
+            id_list = tag["id"]
+            # Since this groups by exact matches, each group should have same values
+            # We'll take the first value from each list to represent the group
+            group_keys.add((category_list[0], id_list[0]))
+        assert group_keys == {("A", "1"), ("B", "2")}
+
+    def test_group_by_with_selection_function(self):
+        """Test groupby with selection function."""
+        tags = [
+            {"category": "A", "priority": "1"},
+            {"category": "A", "priority": "2"},
+            {"category": "A", "priority": "3"},
+        ]
+        packets = [
+            {"value": "data/item1.txt"},
+            {"value": "data/item2.txt"},
+            {"value": "data/item3.txt"},
+        ]
+
+        # Selection function that only keeps items with priority >= 2
+        def select_high_priority(grouped_items):
+            return [int(tag["priority"]) >= 2 for tag, packet in grouped_items]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        group_by = GroupBy(
+            group_keys=["category"], selection_function=select_high_priority
+        )
+        grouped_stream = group_by(stream)
+
+        results = list(grouped_stream)
+
+        assert len(results) == 1
+        tag, packet = results[0]
+
+        # Should only have priority 2 and 3 items
+        assert tag["priority"] == ["2", "3"]
+        assert packet["value"] == ["data/item2.txt", "data/item3.txt"]
+
+    def test_group_by_empty_stream(self):
+        """Test groupby with empty stream."""
+        stream = SyncStreamFromLists(
+            tags=[], packets=[], tag_keys=["category", "id"], packet_keys=["value"]
+        )
+        group_by = GroupBy(group_keys=["category"])
+        grouped_stream = group_by(stream)
+
+        results = list(grouped_stream)
+        assert len(results) == 0
+
+    def test_group_by_single_item(self):
+        """Test groupby with single item."""
+        tags = [{"category": "A", "id": "1"}]
+        packets = [{"value": "data/item1.txt"}]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        group_by = GroupBy(group_keys=["category"])
+        grouped_stream = group_by(stream)
+
+        results = list(grouped_stream)
+
+        assert len(results) == 1
+        tag, packet = results[0]
+        assert tag["category"] == [
+            "A"
+        ]  # With reduce_keys=False, even single values become lists
+        assert tag["id"] == ["1"]
+        assert packet["value"] == ["data/item1.txt"]
+
+    def test_group_by_missing_group_keys(self):
+        """Test groupby when some items don't have the group keys."""
+        tags = [
+            {"category": "A", "id": "1"},
+            {"id": "2"},  # Missing category
+            {"category": "A", "id": "3"},
+        ]
+        packets = [
+            {"value": "data/item1.txt"},
+            {"value": "data/item2.txt"},
+            {"value": "data/item3.txt"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        group_by = GroupBy(group_keys=["category"])
+        grouped_stream = group_by(stream)
+
+        results = list(grouped_stream)
+
+        # Should have 2 groups: category="A" and category=None
+        assert len(results) == 2
+
+        categories = set()
+        for tag, _ in results:
+            # When reduce_keys=False, all values are lists
+            category_list = tag.get("category", [None])
+            if category_list and category_list != [None]:
+                categories.add(category_list[0])
+            else:
+                categories.add(None)
+        assert categories == {"A", None}
+
+    def test_group_by_selection_function_filters_all(self):
+        """Test groupby where selection function filters out all items."""
+        tags = [
+            {"category": "A", "priority": "1"},
+            {"category": "A", "priority": "2"},
+        ]
+        packets = [
+            {"value": "data/item1.txt"},
+            {"value": "data/item2.txt"},
+        ]
+
+        # Selection function that filters out everything
+        def select_none(grouped_items):
+            return [False] * len(grouped_items)
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        group_by = GroupBy(group_keys=["category"], selection_function=select_none)
+        grouped_stream = group_by(stream)
+
+        results = list(grouped_stream)
+
+        # Should have no results since everything was filtered out
+        assert len(results) == 0
+
+    def test_group_by_multiple_streams_error(self):
+        """Test that GroupBy raises error with multiple streams."""
+        stream1 = SyncStreamFromLists(tags=[{"a": "1"}], packets=[{"b": "file.txt"}])
+        stream2 = SyncStreamFromLists(tags=[{"c": "3"}], packets=[{"d": "file2.txt"}])
+
+        group_by = GroupBy(group_keys=["a"])
+
+        with pytest.raises(ValueError, match="exactly one stream"):
+            list(group_by(stream1, stream2))
+
+    def test_group_by_pickle(self):
+        """Test that GroupBy mapper is pickleable."""
+        # Test basic GroupBy
+        group_by = GroupBy(group_keys=["category"])
+        pickled = pickle.dumps(group_by)
+        unpickled = pickle.loads(pickled)
+
+        assert unpickled.group_keys == group_by.group_keys
+        assert unpickled.reduce_keys == group_by.reduce_keys
+        assert unpickled.selection_function == group_by.selection_function
+
+        # Test with reduce_keys
+        group_by_reduce = GroupBy(group_keys=["category"], reduce_keys=True)
+        pickled_reduce = pickle.dumps(group_by_reduce)
+        unpickled_reduce = pickle.loads(pickled_reduce)
+
+        assert unpickled_reduce.group_keys == group_by_reduce.group_keys
+        assert unpickled_reduce.reduce_keys == group_by_reduce.reduce_keys
+
+    def test_group_by_identity_structure(self):
+        """Test GroupBy identity_structure method."""
+        stream = SyncStreamFromLists(tags=[{"a": "1"}], packets=[{"b": "file.txt"}])
+
+        # Test without selection function
+        group_by1 = GroupBy(group_keys=["category"])
+        structure1 = group_by1.identity_structure(stream)
+        assert structure1[0] == "GroupBy"
+        assert structure1[1] == ["category"]
+        assert not structure1[2]  # reduce_keys
+
+        # Test with reduce_keys
+        group_by2 = GroupBy(group_keys=["category"], reduce_keys=True)
+        structure2 = group_by2.identity_structure(stream)
+        assert structure2[2]  # reduce_keys
+
+        # Different group_keys should have different structures
+        group_by3 = GroupBy(group_keys=["other"])
+        structure3 = group_by3.identity_structure(stream)
+        assert structure1 != structure3
+
+    def test_group_by_repr(self):
+        """Test GroupBy string representation."""
+        group_by = GroupBy(group_keys=["category"], reduce_keys=True)
+        repr_str = repr(group_by)
+        # Should contain class name and key parameters
+        assert "GroupBy" in repr_str
diff --git a/tests/test_streams_operations/test_mappers/test_join.py b/tests/test_streams_operations/test_mappers/test_join.py
new file mode 100644
index 0000000..7b60571
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_join.py
@@ -0,0 +1,198 @@
+"""Tests for Join mapper functionality."""
+
+import pytest
+import pickle
+from orcabridge.mappers import Join
+from orcabridge.streams import SyncStreamFromLists
+
+
+class TestJoin:
+    """Test cases for Join mapper."""
+
+    def test_join_basic(self, sample_packets, sample_tags):
+        """Test basic join functionality."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        join = Join()
+        joined_stream = join(stream)
+
+        # Join should collect all packets into a single packet
+        packets = list(joined_stream)
+
+        assert len(packets) == 1
+        joined_packet, joined_tag = packets[0]
+
+        # The joined packet should contain all original packets
+        assert len(joined_packet) == len(sample_packets)
+        assert list(joined_packet) == sample_packets
+
+    def test_join_empty_stream(self):
+        """Test join with empty stream."""
+        empty_stream = SyncStreamFromLists([], [])
+        join = Join()
+        joined_stream = join(empty_stream)
+
+        packets = list(joined_stream)
+
+        assert len(packets) == 1
+        joined_packet, _ = packets[0]
+        assert len(joined_packet) == 0
+        assert list(joined_packet) == []
+
+    def test_join_single_packet(self):
+        """Test join with single packet stream."""
+        packets = ["single_packet"]
+        tags = ["single_tag"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        join = Join()
+        joined_stream = join(stream)
+
+        result = list(joined_stream)
+        assert len(result) == 1
+
+        joined_packet, joined_tag = result[0]
+        assert len(joined_packet) == 1
+        assert list(joined_packet) == ["single_packet"]
+
+    def test_join_preserves_packet_types(self):
+        """Test that join preserves different packet types."""
+        packets = [PacketType("data1"), {"key": "value"}, [1, 2, 3], 42, "string"]
+        tags = ["type1", "type2", "type3", "type4", "type5"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        join = Join()
+        joined_stream = join(stream)
+
+        result = list(joined_stream)
+        assert len(result) == 1
+
+        joined_packet, _ = result[0]
+        assert len(joined_packet) == 5
+
+        joined_list = list(joined_packet)
+        assert joined_list[0] == PacketType("data1")
+        assert joined_list[1] == {"key": "value"}
+        assert joined_list[2] == [1, 2, 3]
+        assert joined_list[3] == 42
+        assert joined_list[4] == "string"
+
+    def test_join_maintains_order(self):
+        """Test that join maintains packet order."""
+        packets = [f"packet_{i}" for i in range(10)]
+        tags = [f"tag_{i}" for i in range(10)]
+
+        stream = SyncStreamFromLists(packets, tags)
+        join = Join()
+        joined_stream = join(stream)
+
+        result = list(joined_stream)
+        joined_packet, _ = result[0]
+
+        assert list(joined_packet) == packets
+
+    def test_join_tag_handling(self, sample_packets, sample_tags):
+        """Test how join handles tags."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        join = Join()
+        joined_stream = join(stream)
+
+        result = list(joined_stream)
+        _, joined_tag = result[0]
+
+        # The joined tag should be a collection of original tags
+        # (implementation-specific behavior)
+        assert joined_tag is not None
+
+    def test_join_large_stream(self):
+        """Test join with large stream."""
+        packets = [f"packet_{i}" for i in range(1000)]
+        tags = [f"tag_{i}" for i in range(1000)]
+
+        stream = SyncStreamFromLists(packets, tags)
+        join = Join()
+        joined_stream = join(stream)
+
+        result = list(joined_stream)
+        assert len(result) == 1
+
+        joined_packet, _ = result[0]
+        assert len(joined_packet) == 1000
+        assert list(joined_packet) == packets
+
+    def test_join_nested_structures(self):
+        """Test join with nested data structures."""
+        packets = [{"nested": {"data": 1}}, [1, [2, 3], 4], ((1, 2), (3, 4))]
+        tags = ["dict", "list", "tuple"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        join = Join()
+        joined_stream = join(stream)
+
+        result = list(joined_stream)
+        joined_packet, _ = result[0]
+
+        joined_list = list(joined_packet)
+        assert joined_list[0] == {"nested": {"data": 1}}
+        assert joined_list[1] == [1, [2, 3], 4]
+        assert joined_list[2] == ((1, 2), (3, 4))
+
+    def test_join_with_none_packets(self):
+        """Test join with None packets."""
+        packets = ["data1", None, "data2", None]
+        tags = ["tag1", "tag2", "tag3", "tag4"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        join = Join()
+        joined_stream = join(stream)
+
+        result = list(joined_stream)
+        joined_packet, _ = result[0]
+
+        joined_list = list(joined_packet)
+        assert joined_list == ["data1", None, "data2", None]
+
+    def test_join_chaining(self, sample_packets, sample_tags):
+        """Test chaining join operations."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+
+        # First join
+        join1 = Join()
+        joined_stream1 = join1(stream)
+
+        # Second join (should join the already joined result)
+        join2 = Join()
+        joined_stream2 = join2(joined_stream1)
+
+        result = list(joined_stream2)
+        assert len(result) == 1
+
+        # The result should be a packet containing one element (the previous join result)
+        final_packet, _ = result[0]
+        assert len(final_packet) == 1
+
+    def test_join_memory_efficiency(self):
+        """Test that join doesn't consume excessive memory for large streams."""
+        # This is more of a performance test, but we can check basic functionality
+        packets = [f"packet_{i}" for i in range(10000)]
+        tags = [f"tag_{i}" for i in range(10000)]
+
+        stream = SyncStreamFromLists(packets, tags)
+        join = Join()
+        joined_stream = join(stream)
+
+        # Just verify it completes without issues
+        result = list(joined_stream)
+        assert len(result) == 1
+
+        joined_packet, _ = result[0]
+        assert len(joined_packet) == 10000
+
+    def test_join_pickle(self):
+        """Test that Join mapper is pickleable."""
+        join = Join()
+        pickled = pickle.dumps(join)
+        unpickled = pickle.loads(pickled)
+
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, Join)
+        assert unpickled.__class__.__name__ == "Join"
diff --git a/tests/test_streams_operations/test_mappers/test_map_packets.py b/tests/test_streams_operations/test_mappers/test_map_packets.py
new file mode 100644
index 0000000..da278de
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_map_packets.py
@@ -0,0 +1,273 @@
+"""Tests for MapPackets mapper functionality."""
+
+import pytest
+from orcabridge.base import PacketType
+from orcabridge.mapper import MapPackets
+from orcabridge.stream import SyncStreamFromLists
+
+
+class TestMapPackets:
+    """Test cases for MapPackets mapper."""
+
+    def test_map_packets_basic(self, sample_packets, sample_tags):
+        """Test basic map packets functionality."""
+
+        def add_suffix(packet):
+            return f"{packet}_mapped"
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        map_packets = MapPackets(add_suffix)
+        mapped_stream = map_packets(stream)
+
+        result_packets = []
+        result_tags = []
+        for packet, tag in mapped_stream:
+            result_packets.append(packet)
+            result_tags.append(tag)
+
+        # Packets should be transformed, tags unchanged
+        expected_packets = [f"{p}_mapped" for p in sample_packets]
+        assert result_packets == expected_packets
+        assert result_tags == sample_tags
+
+    def test_map_packets_numeric_transformation(self):
+        """Test map packets with numeric transformation."""
+        packets = [1, 2, 3, 4, 5]
+        tags = ["num1", "num2", "num3", "num4", "num5"]
+
+        def square(packet):
+            return packet**2
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_packets = MapPackets(square)
+        mapped_stream = map_packets(stream)
+
+        result = list(mapped_stream)
+
+        expected_packets = [1, 4, 9, 16, 25]
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == tags
+
+    def test_map_packets_type_conversion(self):
+        """Test map packets with type conversion."""
+        packets = ["1", "2", "3", "4"]
+        tags = ["str1", "str2", "str3", "str4"]
+
+        def str_to_int(packet):
+            return int(packet)
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_packets = MapPackets(str_to_int)
+        mapped_stream = map_packets(stream)
+
+        result = list(mapped_stream)
+
+        expected_packets = [1, 2, 3, 4]
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets
+        assert all(isinstance(p, int) for p in actual_packets)
+
+    def test_map_packets_complex_transformation(self):
+        """Test map packets with complex data transformation."""
+        packets = [
+            {"name": "alice", "age": 25},
+            {"name": "bob", "age": 30},
+            {"name": "charlie", "age": 35},
+        ]
+        tags = ["person1", "person2", "person3"]
+
+        def create_description(packet):
+            return f"{packet['name']} is {packet['age']} years old"
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_packets = MapPackets(create_description)
+        mapped_stream = map_packets(stream)
+
+        result = list(mapped_stream)
+
+        expected_packets = [
+            "alice is 25 years old",
+            "bob is 30 years old",
+            "charlie is 35 years old",
+        ]
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets
+
+    def test_map_packets_identity_function(self, sample_packets, sample_tags):
+        """Test map packets with identity function."""
+
+        def identity(packet):
+            return packet
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        map_packets = MapPackets(identity)
+        mapped_stream = map_packets(stream)
+
+        result = list(mapped_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == sample_packets
+        assert actual_tags == sample_tags
+
+    def test_map_packets_empty_stream(self):
+        """Test map packets with empty stream."""
+
+        def dummy_transform(packet):
+            return packet * 2
+
+        empty_stream = SyncStreamFromLists([], [])
+        map_packets = MapPackets(dummy_transform)
+        mapped_stream = map_packets(empty_stream)
+
+        result = list(mapped_stream)
+        assert len(result) == 0
+
+    def test_map_packets_with_none_values(self):
+        """Test map packets with None values."""
+        packets = [1, None, 3, None, 5]
+        tags = ["num1", "null1", "num3", "null2", "num5"]
+
+        def handle_none(packet):
+            return 0 if packet is None else packet * 2
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_packets = MapPackets(handle_none)
+        mapped_stream = map_packets(stream)
+
+        result = list(mapped_stream)
+
+        expected_packets = [2, 0, 6, 0, 10]
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets
+
+    def test_map_packets_exception_handling(self):
+        """Test map packets when transformation function raises exception."""
+        packets = [1, 2, "invalid", 4]
+        tags = ["num1", "num2", "str1", "num4"]
+
+        def divide_by_packet(packet):
+            return 10 / packet  # Will fail on "invalid"
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_packets = MapPackets(divide_by_packet)
+        mapped_stream = map_packets(stream)
+
+        # Should raise exception when processing "invalid"
+        with pytest.raises(TypeError):
+            list(mapped_stream)
+
+    def test_map_packets_preserves_order(self):
+        """Test that map packets preserves packet order."""
+        packets = [f"packet_{i}" for i in range(100)]
+        tags = [f"tag_{i}" for i in range(100)]
+
+        def add_prefix(packet):
+            return f"mapped_{packet}"
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_packets = MapPackets(add_prefix)
+        mapped_stream = map_packets(stream)
+
+        result = list(mapped_stream)
+
+        expected_packets = [f"mapped_packet_{i}" for i in range(100)]
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets
+
+    def test_map_packets_with_lambda(self, sample_packets, sample_tags):
+        """Test map packets with lambda function."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        map_packets = MapPackets(lambda x: f"λ({x})")
+        mapped_stream = map_packets(stream)
+
+        result = list(mapped_stream)
+
+        expected_packets = [f"λ({p})" for p in sample_packets]
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets
+
+    def test_map_packets_chaining(self, sample_packets, sample_tags):
+        """Test chaining multiple map packets operations."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+
+        # First transformation
+        map1 = MapPackets(lambda x: f"first_{x}")
+        stream1 = map1(stream)
+
+        # Second transformation
+        map2 = MapPackets(lambda x: f"second_{x}")
+        stream2 = map2(stream1)
+
+        result = list(stream2)
+
+        expected_packets = [f"second_first_{p}" for p in sample_packets]
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets
+
+    def test_map_packets_with_packet_type(self):
+        """Test map packets with PacketType objects."""
+        packets = [PacketType("data1"), PacketType("data2")]
+        tags = ["type1", "type2"]
+
+        def extract_data(packet):
+            return packet.data if hasattr(packet, "data") else str(packet)
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_packets = MapPackets(extract_data)
+        mapped_stream = map_packets(stream)
+
+        result = list(mapped_stream)
+        actual_packets = [packet for packet, _ in result]
+
+        # Should extract string representation or data
+        assert len(actual_packets) == 2
+        assert all(isinstance(p, str) for p in actual_packets)
+
+    def test_map_packets_stateful_transformation(self):
+        """Test map packets with stateful transformation."""
+        packets = [1, 2, 3, 4, 5]
+        tags = ["n1", "n2", "n3", "n4", "n5"]
+
+        class Counter:
+            def __init__(self):
+                self.count = 0
+
+            def transform(self, packet):
+                self.count += 1
+                return (packet, self.count)
+
+        counter = Counter()
+        stream = SyncStreamFromLists(packets, tags)
+        map_packets = MapPackets(counter.transform)
+        mapped_stream = map_packets(stream)
+
+        result = list(mapped_stream)
+
+        expected_packets = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]
+        actual_packets = [packet for packet, _ in result]
+
+        assert actual_packets == expected_packets    def test_map_packets_pickle(self):
+        """Test that MapPackets mapper is pickleable."""
+        import pickle
+        from orcabridge.mappers import MapPackets
+        
+        # MapPackets takes a key mapping, not a transformation function
+        key_map = {"old_key": "new_key", "data": "value"}
+        map_packets = MapPackets(key_map)
+        pickled = pickle.dumps(map_packets)
+        unpickled = pickle.loads(pickled)
+        
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, MapPackets)
+        assert unpickled.key_map == map_packets.key_map
diff --git a/tests/test_streams_operations/test_mappers/test_map_tags.py b/tests/test_streams_operations/test_mappers/test_map_tags.py
new file mode 100644
index 0000000..a8e185a
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_map_tags.py
@@ -0,0 +1,330 @@
+"""Tests for MapTags mapper functionality."""
+
+import pytest
+from orcabridge.base import PacketType
+from orcabridge.mapper import MapTags
+from orcabridge.stream import SyncStreamFromLists
+
+
+class TestMapTags:
+    """Test cases for MapTags mapper."""
+
+    def test_map_tags_basic(self, sample_packets, sample_tags):
+        """Test basic map tags functionality."""
+
+        def add_prefix(tag):
+            return f"mapped_{tag}"
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        map_tags = MapTags(add_prefix)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        expected_tags = [f"mapped_{t}" for t in sample_tags]
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        # Packets should be unchanged, tags transformed
+        assert actual_packets == sample_packets
+        assert actual_tags == expected_tags
+
+    def test_map_tags_type_conversion(self, sample_packets):
+        """Test map tags with type conversion."""
+        tags = ["1", "2", "3"]
+
+        def str_to_int(tag):
+            return int(tag)
+
+        stream = SyncStreamFromLists(sample_packets, tags)
+        map_tags = MapTags(str_to_int)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        expected_tags = [1, 2, 3]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_tags == expected_tags
+        assert all(isinstance(t, int) for t in actual_tags)
+
+    def test_map_tags_complex_transformation(self):
+        """Test map tags with complex transformation."""
+        packets = ["data1", "data2", "data3"]
+        tags = [
+            {"type": "string", "length": 5},
+            {"type": "string", "length": 5},
+            {"type": "string", "length": 5},
+        ]
+
+        def extract_type(tag):
+            if isinstance(tag, dict):
+                return tag.get("type", "unknown")
+            return str(tag)
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_tags = MapTags(extract_type)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        expected_tags = ["string", "string", "string"]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_tags == expected_tags
+
+    def test_map_tags_identity_function(self, sample_packets, sample_tags):
+        """Test map tags with identity function."""
+
+        def identity(tag):
+            return tag
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        map_tags = MapTags(identity)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == sample_packets
+        assert actual_tags == sample_tags
+
+    def test_map_tags_empty_stream(self):
+        """Test map tags with empty stream."""
+
+        def dummy_transform(tag):
+            return f"transformed_{tag}"
+
+        empty_stream = SyncStreamFromLists([], [])
+        map_tags = MapTags(dummy_transform)
+        mapped_stream = map_tags(empty_stream)
+
+        result = list(mapped_stream)
+        assert len(result) == 0
+
+    def test_map_tags_with_none_values(self, sample_packets):
+        """Test map tags with None values."""
+        tags = ["tag1", None, "tag3"]
+
+        def handle_none(tag):
+            return "NULL_TAG" if tag is None else tag.upper()
+
+        stream = SyncStreamFromLists(sample_packets, tags)
+        map_tags = MapTags(handle_none)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        expected_tags = ["TAG1", "NULL_TAG", "TAG3"]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_tags == expected_tags
+
+    def test_map_tags_exception_handling(self, sample_packets):
+        """Test map tags when transformation function raises exception."""
+        tags = ["valid", "also_valid", 123]  # 123 will cause error in upper()
+
+        def to_upper(tag):
+            return tag.upper()  # Will fail on integer
+
+        stream = SyncStreamFromLists(sample_packets, tags)
+        map_tags = MapTags(to_upper)
+        mapped_stream = map_tags(stream)
+
+        # Should raise exception when processing integer tag
+        with pytest.raises(AttributeError):
+            list(mapped_stream)
+
+    def test_map_tags_preserves_packets(self):
+        """Test that map tags preserves all packet types."""
+        packets = [PacketType("data1"), {"key": "value"}, [1, 2, 3], 42, "string"]
+        tags = ["type1", "type2", "type3", "type4", "type5"]
+
+        def add_suffix(tag):
+            return f"{tag}_processed"
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_tags = MapTags(add_suffix)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        expected_tags = [f"{t}_processed" for t in tags]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == packets
+        assert actual_tags == expected_tags
+
+    def test_map_tags_maintains_order(self):
+        """Test that map tags maintains packet order."""
+        packets = [f"packet_{i}" for i in range(100)]
+        tags = [f"tag_{i}" for i in range(100)]
+
+        def reverse_tag(tag):
+            return tag[::-1]  # Reverse the string
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_tags = MapTags(reverse_tag)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        expected_tags = [f"{i}_gat" for i in range(100)]  # "tag_i" reversed
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == packets
+        assert actual_tags == expected_tags
+
+    def test_map_tags_with_lambda(self, sample_packets, sample_tags):
+        """Test map tags with lambda function."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        map_tags = MapTags(lambda t: f"λ({t})")
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        expected_tags = [f"λ({t})" for t in sample_tags]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_tags == expected_tags
+
+    def test_map_tags_chaining(self, sample_packets, sample_tags):
+        """Test chaining multiple map tags operations."""
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+
+        # First transformation
+        map1 = MapTags(lambda t: f"first_{t}")
+        stream1 = map1(stream)
+
+        # Second transformation
+        map2 = MapTags(lambda t: f"second_{t}")
+        stream2 = map2(stream1)
+
+        result = list(stream2)
+
+        expected_tags = [f"second_first_{t}" for t in sample_tags]
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == sample_packets
+        assert actual_tags == expected_tags
+
+    def test_map_tags_stateful_transformation(self):
+        """Test map tags with stateful transformation."""
+        packets = ["a", "b", "c", "d", "e"]
+        tags = ["tag1", "tag2", "tag3", "tag4", "tag5"]
+
+        class TagCounter:
+            def __init__(self):
+                self.count = 0
+
+            def transform(self, tag):
+                self.count += 1
+                return f"{tag}_#{self.count}"
+
+        counter = TagCounter()
+        stream = SyncStreamFromLists(packets, tags)
+        map_tags = MapTags(counter.transform)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        expected_tags = ["tag1_#1", "tag2_#2", "tag3_#3", "tag4_#4", "tag5_#5"]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_tags == expected_tags
+
+    def test_map_tags_with_complex_types(self):
+        """Test map tags with complex tag types."""
+        packets = ["data1", "data2", "data3"]
+        tags = [
+            {"id": 1, "category": "A"},
+            {"id": 2, "category": "B"},
+            {"id": 3, "category": "A"},
+        ]
+
+        def extract_category(tag):
+            if isinstance(tag, dict):
+                return f"cat_{tag.get('category', 'unknown')}"
+            return str(tag)
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_tags = MapTags(extract_category)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        expected_tags = ["cat_A", "cat_B", "cat_A"]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_tags == expected_tags
+
+    def test_map_tags_preserves_tag_references(self):
+        """Test that map tags doesn't break tag references when not needed."""
+        packets = ["data1", "data2"]
+        shared_tag = {"shared": "reference"}
+        tags = [shared_tag, shared_tag]
+
+        def conditional_transform(tag):
+            # Only transform if it's a string
+            if isinstance(tag, str):
+                return f"transformed_{tag}"
+            return tag  # Keep dict unchanged
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_tags = MapTags(conditional_transform)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        actual_tags = [tag for _, tag in result]
+
+        # Both tags should still reference the same object
+        assert actual_tags[0] is shared_tag
+        assert actual_tags[1] is shared_tag
+        assert actual_tags[0] is actual_tags[1]
+
+    def test_map_tags_large_stream(self):
+        """Test map tags with large stream."""
+        packets = [f"packet_{i}" for i in range(1000)]
+        tags = [f"tag_{i}" for i in range(1000)]
+
+        def add_hash(tag):
+            return f"{tag}_{hash(tag) % 1000}"
+
+        stream = SyncStreamFromLists(packets, tags)
+        map_tags = MapTags(add_hash)
+        mapped_stream = map_tags(stream)
+
+        result = list(mapped_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert len(actual_packets) == 1000
+        assert len(actual_tags) == 1000
+        assert actual_packets == packets
+
+        # All tags should have been transformed
+        assert all(
+            "_" in tag and tag != f"tag_{i}" for i, tag in enumerate(actual_tags)
+        )
+
+    def test_map_tags_pickle(self):
+        """Test that MapTags mapper is pickleable."""
+        import pickle
+        from orcabridge.mappers import MapTags
+
+        # MapTags takes a key mapping, not a transformation function
+        key_map = {"old_tag": "new_tag", "category": "type"}
+        map_tags = MapTags(key_map)
+        pickled = pickle.dumps(map_tags)
+        unpickled = pickle.loads(pickled)
+
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, MapTags)
+        assert unpickled.key_map == map_tags.key_map
diff --git a/tests/test_streams_operations/test_mappers/test_merge.py b/tests/test_streams_operations/test_mappers/test_merge.py
new file mode 100644
index 0000000..fb4c655
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_merge.py
@@ -0,0 +1,208 @@
+"""Tests for Merge mapper functionality."""
+
+import pickle
+import pytest
+from orcabridge.base import PacketType
+from orcabridge.mappers import Merge
+from orcabridge.streams import SyncStreamFromLists
+
+
+class TestMerge:
+    """Test cases for Merge mapper."""
+
+    def test_merge_two_streams(self, sample_packets, sample_tags):
+        """Test merging two streams."""
+        # Create two streams
+        stream1 = SyncStreamFromLists(sample_packets[:2], sample_tags[:2])
+        stream2 = SyncStreamFromLists(sample_packets[2:], sample_tags[2:])
+
+        merge = Merge()
+        merged_stream = merge(stream1, stream2)
+
+        packets = []
+        tags = []
+        for packet, tag in merged_stream:
+            packets.append(packet)
+            tags.append(tag)
+
+        # Should contain all packets from both streams
+        assert len(packets) == 3
+        assert set(packets) == set(sample_packets)
+        assert set(tags) == set(sample_tags)
+
+    def test_merge_multiple_streams(self, sample_packets, sample_tags):
+        """Test merging multiple streams."""
+        # Create three streams with one packet each
+        streams = []
+        for i in range(3):
+            stream = SyncStreamFromLists([sample_packets[i]], [sample_tags[i]])
+            streams.append(stream)
+
+        merge = Merge()
+        merged_stream = merge(*streams)
+
+        packets = []
+        tags = []
+        for packet, tag in merged_stream:
+            packets.append(packet)
+            tags.append(tag)
+
+        assert len(packets) == 3
+        assert set(packets) == set(sample_packets)
+        assert set(tags) == set(sample_tags)
+
+    def test_merge_empty_streams(self):
+        """Test merging with empty streams."""
+        empty1 = SyncStreamFromLists([], [])
+        empty2 = SyncStreamFromLists([], [])
+
+        merge = Merge()
+        merged_stream = merge(empty1, empty2)
+
+        packets = list(merged_stream)
+        assert len(packets) == 0
+
+    def test_merge_one_empty_one_full(self, sample_stream):
+        """Test merging empty stream with full stream."""
+        empty_stream = SyncStreamFromLists([], [])
+
+        merge = Merge()
+        merged_stream = merge(sample_stream, empty_stream)
+
+        packets = list(merged_stream)
+        original_packets = list(sample_stream)
+
+        assert len(packets) == len(original_packets)
+        # Order might be different, so check sets
+        assert set(packets) == set(original_packets)
+
+    def test_merge_different_lengths(self):
+        """Test merging streams of different lengths."""
+        packets1 = ["a", "b"]
+        tags1 = ["tag1", "tag2"]
+        packets2 = ["c", "d", "e", "f"]
+        tags2 = ["tag3", "tag4", "tag5", "tag6"]
+
+        stream1 = SyncStreamFromLists(packets1, tags1)
+        stream2 = SyncStreamFromLists(packets2, tags2)
+
+        merge = Merge()
+        merged_stream = merge(stream1, stream2)
+
+        packets = []
+        tags = []
+        for packet, tag in merged_stream:
+            packets.append(packet)
+            tags.append(tag)
+
+        assert len(packets) == 6
+        assert set(packets) == set(packets1 + packets2)
+        assert set(tags) == set(tags1 + tags2)
+
+    def test_merge_single_stream(self, sample_stream):
+        """Test merge with single stream."""
+        merge = Merge()
+        merged_stream = merge(sample_stream)
+
+        packets = list(merged_stream)
+        original_packets = list(sample_stream)
+
+        assert packets == original_packets
+
+    def test_merge_preserves_packet_types(self):
+        """Test that merge preserves different packet types."""
+        packets1 = [PacketType("data1"), {"key1": "value1"}]
+        tags1 = ["str1", "dict1"]
+        packets2 = [[1, 2], 42]
+        tags2 = ["list1", "int1"]
+
+        stream1 = SyncStreamFromLists(packets1, tags1)
+        stream2 = SyncStreamFromLists(packets2, tags2)
+
+        merge = Merge()
+        merged_stream = merge(stream1, stream2)
+
+        result_packets = []
+        for packet, _ in merged_stream:
+            result_packets.append(packet)
+
+        assert len(result_packets) == 4
+        assert set(result_packets) == set(packets1 + packets2)
+
+    def test_merge_order_independence(self, sample_packets, sample_tags):
+        """Test that merge order doesn't affect final result set."""
+        stream1 = SyncStreamFromLists(sample_packets[:2], sample_tags[:2])
+        stream2 = SyncStreamFromLists(sample_packets[2:], sample_tags[2:])
+
+        merge = Merge()
+
+        # Merge in one order
+        merged1 = merge(stream1, stream2)
+        packets1 = set(p for p, _ in merged1)
+
+        # Merge in reverse order (need to recreate streams)
+        stream1_new = SyncStreamFromLists(sample_packets[:2], sample_tags[:2])
+        stream2_new = SyncStreamFromLists(sample_packets[2:], sample_tags[2:])
+        merged2 = merge(stream2_new, stream1_new)
+        packets2 = set(p for p, _ in merged2)
+
+        assert packets1 == packets2
+
+    def test_merge_with_duplicate_packets(self):
+        """Test merging streams with duplicate packets."""
+        packets1 = ["a", "b"]
+        tags1 = ["tag1", "tag2"]
+        packets2 = ["a", "c"]  # "a" appears in both streams
+        tags2 = ["tag3", "tag4"]
+
+        stream1 = SyncStreamFromLists(packets1, tags1)
+        stream2 = SyncStreamFromLists(packets2, tags2)
+
+        merge = Merge()
+        merged_stream = merge(stream1, stream2)
+
+        packets = []
+        for packet, _ in merged_stream:
+            packets.append(packet)
+
+        # Should include duplicates
+        assert len(packets) == 4
+        assert packets.count("a") == 2
+        assert "b" in packets
+        assert "c" in packets
+
+    def test_merge_no_streams_error(self):
+        """Test that merge with no streams raises an error."""
+        merge = Merge()
+
+        with pytest.raises(TypeError):
+            merge()
+
+    def test_merge_large_number_of_streams(self):
+        """Test merging a large number of streams."""
+        streams = []
+        all_packets = []
+
+        for i in range(10):
+            packets = [f"packet_{i}"]
+            tags = [f"tag_{i}"]
+            streams.append(SyncStreamFromLists(packets, tags))
+            all_packets.extend(packets)
+
+        merge = Merge()
+        merged_stream = merge(*streams)
+
+        result_packets = []
+        for packet, _ in merged_stream:
+            result_packets.append(packet)
+
+        assert len(result_packets) == 10
+        assert set(result_packets) == set(all_packets)    def test_merge_pickle(self):
+        """Test that Merge mapper is pickleable."""
+        merge = Merge()
+        pickled = pickle.dumps(merge)
+        unpickled = pickle.loads(pickled)
+        
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, Merge)
+        assert unpickled.__class__.__name__ == "Merge"
diff --git a/tests/test_streams_operations/test_mappers/test_repeat.py b/tests/test_streams_operations/test_mappers/test_repeat.py
new file mode 100644
index 0000000..b8a4a98
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_repeat.py
@@ -0,0 +1,186 @@
+"""Tests for Repeat mapper functionality."""
+
+import pytest
+import pickle
+from orcabridge.mappers import Repeat
+
+
+class TestRepeat:
+    """Test cases for Repeat mapper."""
+
+    def test_repeat_basic(self, sample_stream):
+        """Test basic repeat functionality."""
+        repeat = Repeat(3)
+        repeated_stream = repeat(sample_stream)
+
+        packets = list(repeated_stream)
+
+        # Should have 3 times the original packets
+        assert len(packets) == 9  # 3 original * 3 repeats
+
+        # Check that each packet appears 3 times consecutively
+        original_packets = list(sample_stream)
+        expected_packets = []
+        for packet in original_packets:
+            expected_packets.extend([packet] * 3)
+
+        assert packets == expected_packets
+
+    def test_repeat_zero(self, sample_stream):
+        """Test repeat with count 0."""
+        repeat = Repeat(0)
+        repeated_stream = repeat(sample_stream)
+
+        packets = list(repeated_stream)
+        assert len(packets) == 0
+
+    def test_repeat_one(self, sample_stream):
+        """Test repeat with count 1."""
+        repeat = Repeat(1)
+        repeated_stream = repeat(sample_stream)
+
+        packets = list(repeated_stream)
+        original_packets = list(sample_stream)
+
+        assert packets == original_packets
+
+    def test_repeat_with_tags(self, sample_packets, sample_tags):
+        """Test repeat preserves tags correctly."""
+        from orcabridge.streams import SyncStreamFromLists
+
+        stream = SyncStreamFromLists(tags=sample_tags, packets=sample_packets)
+        repeat = Repeat(2)
+        repeated_stream = repeat(stream)
+
+        packets = []
+        tags = []
+        for tag, packet in repeated_stream:
+            packets.append(packet)
+            tags.append(tag)
+
+        # Each packet should appear twice with its corresponding tag
+        assert len(packets) == 6  # 3 original * 2 repeats
+        assert len(tags) == 6
+
+        # Check pattern: [p1,p1,p2,p2,p3,p3] with [t1,t1,t2,t2,t3,t3]
+        expected_packets = []
+        expected_tags = []
+        for p, t in zip(sample_packets, sample_tags):
+            expected_packets.extend([p, p])
+            expected_tags.extend([t, t])
+
+        assert packets == expected_packets
+        assert tags == expected_tags
+
+    def test_repeat_with_empty_stream(self):
+        """Test repeat with empty stream."""
+        from orcabridge.streams import SyncStreamFromLists
+
+        empty_stream = SyncStreamFromLists(tags=[], packets=[])
+        repeat = Repeat(5)
+        repeated_stream = repeat(empty_stream)
+
+        packets = list(repeated_stream)
+        assert len(packets) == 0
+
+    def test_repeat_large_count(self, sample_stream):
+        """Test repeat with large count."""
+        repeat = Repeat(100)
+        repeated_stream = repeat(sample_stream)
+
+        packets = list(repeated_stream)
+        assert len(packets) == 300  # 3 original * 100 repeats
+
+    def test_repeat_negative_count(self):
+        """Test repeat with negative count raises error."""
+        with pytest.raises(ValueError):
+            Repeat(-1)
+
+    def test_repeat_non_integer_count(self):
+        """Test repeat with non-integer count."""
+        with pytest.raises(TypeError):
+            Repeat(3.5)
+
+        with pytest.raises(TypeError):
+            Repeat("3")
+
+    def test_repeat_preserves_packet_types(self, sample_stream):
+        """Test that repeat preserves different packet types."""
+        # Create stream with mixed packet types
+        from orcabridge.streams import SyncStreamFromLists
+
+        packets = [
+            {"data": "data1"},
+            {"key": "value"},
+            {"items": ["a", "b", "c"]},
+            {"number": "42"},
+        ]
+        tags = [{"type": "str"}, {"type": "dict"}, {"type": "list"}, {"type": "int"}]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        repeat = Repeat(2)
+        repeated_stream = repeat(stream)
+
+        result_packets = []
+        for tag, packet in repeated_stream:
+            result_packets.append(packet)
+
+        expected = [
+            {"data": "data1"},
+            {"data": "data1"},
+            {"key": "value"},
+            {"key": "value"},
+            {"items": ["a", "b", "c"]},
+            {"items": ["a", "b", "c"]},
+            {"number": "42"},
+            {"number": "42"},
+        ]
+
+        assert result_packets == expected
+
+    def test_repeat_chaining(self, sample_stream):
+        """Test chaining multiple repeat operations."""
+        repeat1 = Repeat(2)
+        repeat2 = Repeat(3)
+
+        # Apply first repeat
+        stream1 = repeat1(sample_stream)
+        # Apply second repeat
+        stream2 = repeat2(stream1)
+
+        packets = list(stream2)
+
+        # Should have 3 original * 2 * 3 = 18 packets
+        assert len(packets) == 18
+
+        # Each original packet should appear 6 times consecutively
+        original_packets = list(sample_stream)
+        expected = []
+        for packet in original_packets:
+            expected.extend([packet] * 6)
+
+        assert packets == expected
+
+    def test_repeat_pickle(self):
+        """Test that Repeat mapper is pickleable."""
+        repeat = Repeat(5)
+
+        # Test pickle/unpickle
+        pickled = pickle.dumps(repeat)
+        unpickled = pickle.loads(pickled)
+
+        # Verify the unpickled mapper has the same properties
+        assert unpickled.repeat_count == repeat.repeat_count
+
+        # Test that the unpickled mapper works correctly
+        from orcabridge.streams import SyncStreamFromLists
+
+        tags = [{"id": "1"}, {"id": "2"}]
+        packets = [{"data": "file1.txt"}, {"data": "file2.txt"}]
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+
+        original_results = list(repeat(stream))
+        unpickled_results = list(unpickled(stream))
+
+        assert original_results == unpickled_results
+        assert len(original_results) == 10  # 2 * 5 repeats
diff --git a/tests/test_streams_operations/test_mappers/test_transform.py b/tests/test_streams_operations/test_mappers/test_transform.py
new file mode 100644
index 0000000..495081e
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_transform.py
@@ -0,0 +1,364 @@
+"""Tests for Transform mapper functionality."""
+
+import pytest
+from orcabridge.base import PacketType
+from orcabridge.mapper import Transform
+from orcabridge.stream import SyncStreamFromLists
+
+
+class TestTransform:
+    """Test cases for Transform mapper."""
+
+    def test_transform_basic(self, simple_transform):
+        """Test basic transform functionality."""
+        packets = ["hello", "world", "test"]
+        tags = ["greeting", "noun", "action"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(simple_transform)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_packets = ["HELLO", "WORLD", "TEST"]
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == tags  # Tags should be preserved
+
+    def test_transform_with_tag_modification(self):
+        """Test transform that modifies both packet and tag."""
+        packets = [1, 2, 3, 4, 5]
+        tags = ["num1", "num2", "num3", "num4", "num5"]
+
+        def double_and_prefix_tag(packet, tag):
+            return packet * 2, f"doubled_{tag}"
+
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(double_and_prefix_tag)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_packets = [2, 4, 6, 8, 10]
+        expected_tags = [
+            "doubled_num1",
+            "doubled_num2",
+            "doubled_num3",
+            "doubled_num4",
+            "doubled_num5",
+        ]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_transform_packet_only(self, sample_packets, sample_tags):
+        """Test transform that only modifies packets."""
+
+        def add_prefix(packet, tag):
+            return f"transformed_{packet}", tag
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        transform_mapper = Transform(add_prefix)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_packets = [f"transformed_{p}" for p in sample_packets]
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == sample_tags
+
+    def test_transform_tag_only(self, sample_packets, sample_tags):
+        """Test transform that only modifies tags."""
+
+        def add_tag_suffix(packet, tag):
+            return packet, f"{tag}_processed"
+
+        stream = SyncStreamFromLists(sample_packets, sample_tags)
+        transform_mapper = Transform(add_tag_suffix)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_tags = [f"{t}_processed" for t in sample_tags]
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == sample_packets
+        assert actual_tags == expected_tags
+
+    def test_transform_empty_stream(self):
+        """Test transform with empty stream."""
+
+        def dummy_transform(packet, tag):
+            return packet, tag
+
+        empty_stream = SyncStreamFromLists([], [])
+        transform_mapper = Transform(dummy_transform)
+        transformed_stream = transform_mapper(empty_stream)
+
+        result = list(transformed_stream)
+        assert len(result) == 0
+
+    def test_transform_type_conversion(self):
+        """Test transform with type conversion."""
+        packets = ["1", "2", "3", "4", "5"]
+        tags = ["str1", "str2", "str3", "str4", "str5"]
+
+        def str_to_int_with_tag(packet, tag):
+            return int(packet), f"int_{tag}"
+
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(str_to_int_with_tag)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_packets = [1, 2, 3, 4, 5]
+        expected_tags = ["int_str1", "int_str2", "int_str3", "int_str4", "int_str5"]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+        assert all(isinstance(p, int) for p in actual_packets)
+
+    def test_transform_complex_data(self):
+        """Test transform with complex data structures."""
+        packets = [
+            {"name": "alice", "age": 25},
+            {"name": "bob", "age": 30},
+            {"name": "charlie", "age": 35},
+        ]
+        tags = ["person1", "person2", "person3"]
+
+        def enrich_person_data(packet, tag):
+            enriched = packet.copy()
+            enriched["category"] = "adult" if packet["age"] >= 30 else "young"
+            return enriched, f"enriched_{tag}"
+
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(enrich_person_data)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_packets = [
+            {"name": "alice", "age": 25, "category": "young"},
+            {"name": "bob", "age": 30, "category": "adult"},
+            {"name": "charlie", "age": 35, "category": "adult"},
+        ]
+        expected_tags = ["enriched_person1", "enriched_person2", "enriched_person3"]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_transform_with_none_values(self):
+        """Test transform with None values."""
+        packets = [1, None, 3, None, 5]
+        tags = ["num1", "null1", "num3", "null2", "num5"]
+
+        def handle_none_transform(packet, tag):
+            if packet is None:
+                return "MISSING", f"missing_{tag}"
+            else:
+                return packet * 2, f"doubled_{tag}"
+
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(handle_none_transform)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_packets = [2, "MISSING", 6, "MISSING", 10]
+        expected_tags = [
+            "doubled_num1",
+            "missing_null1",
+            "doubled_num3",
+            "missing_null2",
+            "doubled_num5",
+        ]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_transform_preserves_order(self):
+        """Test that transform preserves packet order."""
+        packets = [f"packet_{i}" for i in range(100)]
+        tags = [f"tag_{i}" for i in range(100)]
+
+        def add_index(packet, tag):
+            index = int(packet.split("_")[1])
+            return f"indexed_{index}_{packet}", f"indexed_{tag}"
+
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(add_index)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_packets = [f"indexed_{i}_packet_{i}" for i in range(100)]
+        expected_tags = [f"indexed_tag_{i}" for i in range(100)]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_transform_exception_handling(self):
+        """Test transform when transformation function raises exception."""
+        packets = [1, 2, "invalid", 4]
+        tags = ["num1", "num2", "str1", "num4"]
+
+        def divide_transform(packet, tag):
+            return 10 / packet, f"divided_{tag}"  # Will fail on "invalid"
+
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(divide_transform)
+        transformed_stream = transform_mapper(stream)
+
+        # Should raise exception when processing "invalid"
+        with pytest.raises(TypeError):
+            list(transformed_stream)
+
+    def test_transform_with_lambda(self):
+        """Test transform with lambda function."""
+        packets = [1, 2, 3, 4, 5]
+        tags = ["a", "b", "c", "d", "e"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(lambda p, t: (p**2, t.upper()))
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_packets = [1, 4, 9, 16, 25]
+        expected_tags = ["A", "B", "C", "D", "E"]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_transform_chaining(self):
+        """Test chaining multiple transform operations."""
+        packets = [1, 2, 3, 4, 5]
+        tags = ["num1", "num2", "num3", "num4", "num5"]
+
+        stream = SyncStreamFromLists(packets, tags)
+
+        # First transformation: double the packet
+        transform1 = Transform(lambda p, t: (p * 2, f"doubled_{t}"))
+        stream1 = transform1(stream)
+
+        # Second transformation: add 10 to packet
+        transform2 = Transform(lambda p, t: (p + 10, f"added_{t}"))
+        stream2 = transform2(stream1)
+
+        result = list(stream2)
+
+        expected_packets = [12, 14, 16, 18, 20]  # (original * 2) + 10
+        expected_tags = [
+            "added_doubled_num1",
+            "added_doubled_num2",
+            "added_doubled_num3",
+            "added_doubled_num4",
+            "added_doubled_num5",
+        ]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_transform_with_packet_type(self):
+        """Test transform with PacketType objects."""
+        packets = [PacketType("data1"), PacketType("data2")]
+        tags = ["type1", "type2"]
+
+        def extract_and_modify(packet, tag):
+            data = str(packet)  # Convert to string
+            return f"extracted_{data}", f"processed_{tag}"
+
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(extract_and_modify)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert len(actual_packets) == 2
+        assert all("extracted_" in p for p in actual_packets)
+        assert actual_tags == ["processed_type1", "processed_type2"]
+
+    def test_transform_stateful(self):
+        """Test transform with stateful transformation."""
+        packets = [1, 2, 3, 4, 5]
+        tags = ["n1", "n2", "n3", "n4", "n5"]
+
+        class StatefulTransform:
+            def __init__(self):
+                self.counter = 0
+
+            def transform(self, packet, tag):
+                self.counter += 1
+                return (packet + self.counter, f"{tag}_step_{self.counter}")
+
+        stateful = StatefulTransform()
+        stream = SyncStreamFromLists(packets, tags)
+        transform_mapper = Transform(stateful.transform)
+        transformed_stream = transform_mapper(stream)
+
+        result = list(transformed_stream)
+
+        expected_packets = [2, 4, 6, 8, 10]  # packet + step_number
+        expected_tags = [
+            "n1_step_1",
+            "n2_step_2",
+            "n3_step_3",
+            "n4_step_4",
+            "n5_step_5",
+        ]
+
+        actual_packets = [packet for packet, _ in result]
+        actual_tags = [tag for _, tag in result]
+
+        assert actual_packets == expected_packets
+        assert actual_tags == expected_tags
+
+    def test_transform_pickle(self):
+        """Test that Transform mapper is pickleable."""
+        import pickle
+        from orcabridge.mappers import Transform
+
+        def add_prefix(tag, packet):
+            new_tag = {**tag, "prefix": "test"}
+            new_packet = {**packet, "processed": True}
+            return new_tag, new_packet
+
+        transform = Transform(add_prefix)
+        pickled = pickle.dumps(transform)
+        unpickled = pickle.loads(pickled)
+
+        # Test that unpickled mapper works the same
+        assert isinstance(unpickled, Transform)
+        assert unpickled.__class__.__name__ == "Transform"
diff --git a/tests/test_streams_operations/test_mappers/test_utility_functions.py b/tests/test_streams_operations/test_mappers/test_utility_functions.py
new file mode 100644
index 0000000..9cae09e
--- /dev/null
+++ b/tests/test_streams_operations/test_mappers/test_utility_functions.py
@@ -0,0 +1,248 @@
+"""Tests for utility functions tag() and packet()."""
+
+from orcabridge.mappers import tag, packet
+from orcabridge.streams import SyncStreamFromLists
+
+
+class TestUtilityFunctions:
+    """Test cases for tag() and packet() utility functions."""
+
+    def test_tag_function_basic(self):
+        """Test basic tag() function functionality."""
+        tags = [
+            {"old_key": "value1", "other": "data1"},
+            {"old_key": "value2", "other": "data2"},
+        ]
+        packets = [
+            {"data": "packet1"},
+            {"data": "packet2"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        tag_mapper = tag({"old_key": "new_key"})
+        transformed_stream = tag_mapper(stream)
+
+        results = list(transformed_stream)
+
+        assert len(results) == 2
+        for (result_tag, result_packet), original_packet in zip(results, packets):
+            # Tag should be transformed
+            assert "new_key" in result_tag
+            assert "old_key" not in result_tag  # old key dropped by default
+            assert result_tag["new_key"] in ["value1", "value2"]
+
+            # Packet should be unchanged
+            assert result_packet == original_packet
+
+    def test_tag_function_keep_unmapped(self):
+        """Test tag() function with drop_unmapped=False."""
+        tags = [
+            {"old_key": "value1", "keep_this": "data1"},
+        ]
+        packets = [
+            {"data": "packet1"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        tag_mapper = tag({"old_key": "new_key"}, drop_unmapped=False)
+        transformed_stream = tag_mapper(stream)
+
+        results = list(transformed_stream)
+
+        assert len(results) == 1
+        result_tag, result_packet = results[0]
+
+        # Should have both mapped and unmapped keys
+        assert result_tag["new_key"] == "value1"
+        assert result_tag["keep_this"] == "data1"
+
+    def test_packet_function_basic(self):
+        """Test basic packet() function functionality."""
+        tags = [
+            {"tag_data": "tag1"},
+            {"tag_data": "tag2"},
+        ]
+        packets = [
+            {"old_key": "value1", "other": "data1"},
+            {"old_key": "value2", "other": "data2"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        packet_mapper = packet({"old_key": "new_key"})
+        transformed_stream = packet_mapper(stream)
+
+        results = list(transformed_stream)
+
+        assert len(results) == 2
+        for (result_tag, result_packet), original_tag in zip(results, tags):
+            # Tag should be unchanged
+            assert result_tag == original_tag
+
+            # Packet should be transformed
+            assert "new_key" in result_packet
+            assert "old_key" not in result_packet  # old key dropped by default
+            assert result_packet["new_key"] in ["value1", "value2"]
+
+    def test_packet_function_keep_unmapped(self):
+        """Test packet() function with drop_unmapped=False."""
+        tags = [
+            {"tag_data": "tag1"},
+        ]
+        packets = [
+            {"old_key": "value1", "keep_this": "data1"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        packet_mapper = packet({"old_key": "new_key"}, drop_unmapped=False)
+        transformed_stream = packet_mapper(stream)
+
+        results = list(transformed_stream)
+
+        assert len(results) == 1
+        result_tag, result_packet = results[0]
+
+        # Should have both mapped and unmapped keys
+        assert result_packet["new_key"] == "value1"
+        assert result_packet["keep_this"] == "data1"
+
+    def test_tag_function_empty_mapping(self):
+        """Test tag() function with empty mapping."""
+        tags = [
+            {"key1": "value1", "key2": "value2"},
+        ]
+        packets = [
+            {"data": "packet1"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        tag_mapper = tag({})  # Empty mapping
+        transformed_stream = tag_mapper(stream)
+
+        results = list(transformed_stream)
+
+        assert len(results) == 1
+        result_tag, result_packet = results[0]
+
+        # With empty mapping and drop_unmapped=True (default), all keys should be dropped
+        assert result_tag == {}
+        assert result_packet == packets[0]  # Packet unchanged
+
+    def test_packet_function_empty_mapping(self):
+        """Test packet() function with empty mapping."""
+        tags = [
+            {"tag_data": "tag1"},
+        ]
+        packets = [
+            {"key1": "value1", "key2": "value2"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+        packet_mapper = packet({})  # Empty mapping
+        transformed_stream = packet_mapper(stream)
+
+        results = list(transformed_stream)
+
+        assert len(results) == 1
+        result_tag, result_packet = results[0]
+
+        # With empty mapping and drop_unmapped=True (default), all keys should be dropped
+        assert result_tag == tags[0]  # Tag unchanged
+        assert result_packet == {}
+
+    def test_tag_function_chaining(self):
+        """Test chaining multiple tag() transformations."""
+        tags = [
+            {"a": "value1", "b": "value2", "c": "value3"},
+        ]
+        packets = [
+            {"data": "packet1"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+
+        # Chain transformations
+        tag_mapper1 = tag({"a": "new_a"}, drop_unmapped=False)
+        tag_mapper2 = tag({"b": "new_b"}, drop_unmapped=False)
+
+        transformed_stream = tag_mapper2(tag_mapper1(stream))
+
+        results = list(transformed_stream)
+
+        assert len(results) == 1
+        result_tag, result_packet = results[0]
+
+        # Should have transformations from both mappers
+        assert result_tag["new_a"] == "value1"
+        assert result_tag["new_b"] == "value2"
+        assert result_tag["c"] == "value3"  # Unchanged
+
+    def test_packet_function_chaining(self):
+        """Test chaining multiple packet() transformations."""
+        tags = [
+            {"tag_data": "tag1"},
+        ]
+        packets = [
+            {"a": "value1", "b": "value2", "c": "value3"},
+        ]
+
+        stream = SyncStreamFromLists(tags=tags, packets=packets)
+
+        # Chain transformations
+        packet_mapper1 = packet({"a": "new_a"}, drop_unmapped=False)
+        packet_mapper2 = packet({"b": "new_b"}, drop_unmapped=False)
+
+        transformed_stream = packet_mapper2(packet_mapper1(stream))
+
+        results = list(transformed_stream)
+
+        assert len(results) == 1
+        result_tag, result_packet = results[0]
+
+        # Should have transformations from both mappers
+        assert result_packet["new_a"] == "value1"
+        assert result_packet["new_b"] == "value2"
+        assert result_packet["c"] == "value3"  # Unchanged
+
+    def test_utility_functions_pickle(self):
+        """Test that utility functions tag() and packet() are pickleable."""
+        import pickle
+
+        # Test tag() function
+        tag_mapper = tag({"old_key": "new_key"})
+        pickled_tag = pickle.dumps(tag_mapper)
+        unpickled_tag = pickle.loads(pickled_tag)
+
+        # Test that unpickled tag mapper works
+        assert callable(unpickled_tag)
+
+        # Test packet() function
+        packet_mapper = packet({"old_key": "new_key"})
+        pickled_packet = pickle.dumps(packet_mapper)
+        unpickled_packet = pickle.loads(pickled_packet)
+
+        # Test that unpickled packet mapper works
+        assert callable(unpickled_packet)
+
+    def test_utility_functions_with_complex_streams(self, sample_stream):
+        """Test utility functions with complex streams from fixtures."""
+        # Test tag() with sample stream
+        tag_mapper = tag({"file_name": "filename"}, drop_unmapped=False)
+        transformed_stream = tag_mapper(sample_stream)
+
+        results = list(transformed_stream)
+
+        for result_tag, _ in results:
+            assert "filename" in result_tag
+            assert result_tag["filename"] in ["day1", "day2", "day3"]
+            assert "session" in result_tag  # Kept because drop_unmapped=False
+
+        # Test packet() with sample stream
+        packet_mapper = packet({"txt_file": "text_file"}, drop_unmapped=False)
+        transformed_stream = packet_mapper(sample_stream)
+
+        results = list(transformed_stream)
+
+        for _, result_packet in results:
+            assert "text_file" in result_packet
+            assert "data" in result_packet["text_file"]
+            assert "metadata" in result_packet  # Kept because drop_unmapped=False
diff --git a/tests/test_streams_operations/test_pipelines/__init__.py b/tests/test_streams_operations/test_pipelines/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_streams_operations/test_pipelines/test_basic_pipelines.py b/tests/test_streams_operations/test_pipelines/test_basic_pipelines.py
new file mode 100644
index 0000000..75784a7
--- /dev/null
+++ b/tests/test_streams_operations/test_pipelines/test_basic_pipelines.py
@@ -0,0 +1,542 @@
+"""
+Test module for basic pipeline operations.
+
+This module tests fundamental pipeline construction and execution,
+including chaining operations, combining multiple streams, and
+basic data flow patterns as demonstrated in the notebooks.
+"""
+
+import pytest
+import tempfile
+from pathlib import Path
+
+from orcabridge.base import SyncStream
+from orcabridge.stream import SyncStreamFromLists
+from orcabridge.mapper import (
+    Join,
+    Merge,
+    Filter,
+    Transform,
+    MapPackets,
+    MapTags,
+    Repeat,
+    DefaultTag,
+    Batch,
+    FirstMatch,
+)
+from orcabridge.sources import GlobSource
+from orcabridge.pod import FunctionPod
+
+
+@pytest.fixture
+def temp_files():
+    """Create temporary files for testing."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+
+        # Create test files
+        files = {}
+        for i in range(1, 4):
+            file_path = temp_path / f"test_{i}.txt"
+            content = f"Content of file {i}\nLine 2 of file {i}"
+            with open(file_path, "w") as f:
+                f.write(content)
+            files[f"test_{i}.txt"] = file_path
+
+        yield temp_path, files
+
+
+@pytest.fixture
+def sample_user_data():
+    """Sample user data for pipeline testing."""
+    return [
+        ({"user_id": 1, "session": "a"}, {"name": "Alice", "age": 25, "score": 85}),
+        ({"user_id": 2, "session": "a"}, {"name": "Bob", "age": 30, "score": 92}),
+        ({"user_id": 3, "session": "b"}, {"name": "Charlie", "age": 28, "score": 78}),
+        ({"user_id": 1, "session": "b"}, {"name": "Alice", "age": 25, "score": 88}),
+    ]
+
+
+@pytest.fixture
+def sample_metadata():
+    """Sample metadata for joining."""
+    return [
+        ({"user_id": 1}, {"department": "Engineering", "level": "Senior"}),
+        ({"user_id": 2}, {"department": "Marketing", "level": "Junior"}),
+        ({"user_id": 3}, {"department": "Engineering", "level": "Mid"}),
+    ]
+
+
+class TestBasicPipelineConstruction:
+    """Test basic pipeline construction patterns."""
+
+    def test_simple_linear_pipeline(self, sample_user_data):
+        """Test simple linear pipeline with chained operations."""
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Build pipeline: filter -> transform -> map packets
+        pipeline = (
+            source_stream
+            >> Filter(lambda tag, packet: packet["age"] >= 28)
+            >> Transform(
+                lambda tag, packet: (tag, {**packet, "category": "experienced"})
+            )
+            >> MapPackets({"name": "full_name", "score": "performance"})
+        )
+
+        result = list(pipeline)
+
+        # Should have filtered out users under 28
+        assert len(result) == 3
+
+        # Check transformations applied
+        for tag, packet in result:
+            assert packet["age"] >= 28
+            assert packet["category"] == "experienced"
+            assert "full_name" in packet
+            assert "performance" in packet
+            assert "name" not in packet  # Should be mapped
+            assert "score" not in packet  # Should be mapped
+
+    def test_pipeline_with_join(self, sample_user_data, sample_metadata):
+        """Test pipeline with join operation."""
+        # Create streams
+        user_tags, user_packets = zip(*sample_user_data)
+        meta_tags, meta_packets = zip(*sample_metadata)
+
+        user_stream = SyncStreamFromLists(list(user_tags), list(user_packets))
+        meta_stream = SyncStreamFromLists(list(meta_tags), list(meta_packets))
+
+        # Join streams on user_id
+        joined = Join()(user_stream, meta_stream)
+        result = list(joined)
+
+        # Should have joined records where user_id matches
+        assert len(result) >= 2  # At least Alice and Bob should match
+
+        # Check that joined data has both user and metadata info
+        for tag, packet in result:
+            assert "user_id" in tag
+            assert "name" in packet  # From user data
+            assert "department" in packet  # From metadata
+
+    def test_pipeline_with_merge(self, sample_user_data):
+        """Test pipeline with merge operation."""
+        tags, packets = zip(*sample_user_data)
+
+        # Split data into two streams
+        stream1 = SyncStreamFromLists(list(tags[:2]), list(packets[:2]))
+        stream2 = SyncStreamFromLists(list(tags[2:]), list(packets[2:]))
+
+        # Merge streams
+        merged = Merge()(stream1, stream2)
+        result = list(merged)
+
+        # Should have all items from both streams
+        assert len(result) == 4
+
+        # Order might be different but all data should be present
+        result_user_ids = [tag["user_id"] for tag, packet in result]
+        expected_user_ids = [tag["user_id"] for tag, packet in sample_user_data]
+        assert sorted(result_user_ids) == sorted(expected_user_ids)
+
+    def test_pipeline_with_batch_processing(self, sample_user_data):
+        """Test pipeline with batch processing."""
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Create batches of size 2
+        batched = Batch(batch_size=2)(source_stream)
+        result = list(batched)
+
+        # Should have 2 batches (4 items / 2 per batch)
+        assert len(result) == 2
+
+        # Each result should be a batch
+        for tag, packet in result:
+            assert isinstance(packet, list)
+            assert len(packet) == 2
+            # Tag should be batch representation of individual tags
+            assert isinstance(tag, dict)
+
+    def test_pipeline_with_repeat_operation(self, sample_user_data):
+        """Test pipeline with repeat operation."""
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(
+            list(tags[:2]), list(packets[:2])
+        )  # Use first 2 items
+
+        # Repeat each item 3 times
+        repeated = Repeat(repeat_count=3)(source_stream)
+        result = list(repeated)
+
+        # Should have 6 items total (2 original * 3 repeats)
+        assert len(result) == 6
+
+        # Check that items are correctly repeated
+        assert result[0] == result[1] == result[2]  # First item repeated
+        assert result[3] == result[4] == result[5]  # Second item repeated
+
+    def test_complex_multi_stage_pipeline(self, sample_user_data, sample_metadata):
+        """Test complex pipeline with multiple stages and branches."""
+        # Create source streams
+        user_tags, user_packets = zip(*sample_user_data)
+        meta_tags, meta_packets = zip(*sample_metadata)
+
+        user_stream = SyncStreamFromLists(list(user_tags), list(user_packets))
+        meta_stream = SyncStreamFromLists(list(meta_tags), list(meta_packets))
+
+        # Complex pipeline:
+        # 1. Add default tags to user stream
+        # 2. Join with metadata
+        # 3. Filter by age and score
+        # 4. Transform and map fields
+        pipeline = (
+            DefaultTag({"source": "user_system"})(user_stream)
+            * meta_stream  # Join operation
+            >> Filter(lambda tag, packet: packet["age"] >= 25 and packet["score"] >= 80)
+            >> Transform(
+                lambda tag, packet: (
+                    {**tag, "processed": True},
+                    {**packet, "grade": "A" if packet["score"] >= 90 else "B"},
+                )
+            )
+            >> MapPackets({"name": "employee_name", "department": "dept"})
+        )
+
+        result = list(pipeline)
+
+        # Verify complex transformations
+        for tag, packet in result:
+            assert tag["source"] == "user_system"
+            assert tag["processed"] is True
+            assert packet["age"] >= 25
+            assert packet["score"] >= 80
+            assert packet["grade"] in ["A", "B"]
+            assert "employee_name" in packet
+            assert "dept" in packet
+
+    def test_pipeline_error_propagation(self, sample_user_data):
+        """Test that errors propagate correctly through pipeline."""
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Create pipeline with operation that will fail
+        def failing_transform(tag, packet):
+            if packet["age"] > 29:
+                raise ValueError("Age too high!")
+            return tag, packet
+
+        pipeline = source_stream >> Transform(failing_transform)
+
+        # Should propagate the error
+        with pytest.raises(ValueError, match="Age too high!"):
+            list(pipeline)
+
+    def test_pipeline_with_empty_stream(self):
+        """Test pipeline behavior with empty streams."""
+        empty_stream = SyncStreamFromLists([], [])
+
+        # Apply operations to empty stream
+        pipeline = (
+            empty_stream
+            >> Filter(lambda tag, packet: True)
+            >> Transform(lambda tag, packet: (tag, {**packet, "processed": True}))
+        )
+
+        result = list(pipeline)
+        assert result == []
+
+    def test_pipeline_with_first_match(self, sample_user_data, sample_metadata):
+        """Test pipeline with FirstMatch operation."""
+        user_tags, user_packets = zip(*sample_user_data)
+        meta_tags, meta_packets = zip(*sample_metadata)
+
+        user_stream = SyncStreamFromLists(list(user_tags), list(user_packets))
+        meta_stream = SyncStreamFromLists(list(meta_tags), list(meta_packets))
+
+        # Use FirstMatch instead of Join
+        matched = FirstMatch()(user_stream, meta_stream)
+        result = list(matched)
+
+        # FirstMatch should consume items from both streams
+        assert len(result) <= len(sample_user_data)
+
+        # Each result should have matched data
+        for tag, packet in result:
+            assert "user_id" in tag
+            assert "name" in packet or "department" in packet
+
+
+class TestPipelineDataFlow:
+    """Test data flow patterns in pipelines."""
+
+    def test_data_preservation_through_pipeline(self, sample_user_data):
+        """Test that data is correctly preserved through transformations."""
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Track original data
+        original_user_ids = [tag["user_id"] for tag, packet in sample_user_data]
+        original_names = [packet["name"] for tag, packet in sample_user_data]
+
+        # Pipeline that shouldn't lose data
+        pipeline = (
+            source_stream
+            >> MapTags({"user_id": "id"})  # Rename tag field
+            >> MapPackets({"name": "username"})  # Rename packet field
+        )
+
+        result = list(pipeline)
+
+        # Check data preservation
+        result_ids = [tag["id"] for tag, packet in result]
+        result_names = [packet["username"] for tag, packet in result]
+
+        assert sorted(result_ids) == sorted(original_user_ids)
+        assert sorted(result_names) == sorted(original_names)
+
+    def test_data_aggregation_pipeline(self, sample_user_data):
+        """Test pipeline that aggregates data."""
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Aggregate by session
+        def aggregate_by_session(tag, packet):
+            return {"session": tag["session"]}, {
+                "users": [packet["name"]],
+                "avg_score": packet["score"],
+                "count": 1,
+            }
+
+        # Transform and then batch by session (simplified aggregation)
+        pipeline = source_stream >> Transform(aggregate_by_session)
+
+        result = list(pipeline)
+
+        # Should have transformed all items
+        assert len(result) == len(sample_user_data)
+
+        # Check session-based grouping
+        sessions = [tag["session"] for tag, packet in result]
+        assert "a" in sessions
+        assert "b" in sessions
+
+    def test_conditional_processing_pipeline(self, sample_user_data):
+        """Test pipeline with conditional processing branches."""
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Split into high and low performers
+        high_performers = (
+            source_stream
+            >> Filter(lambda tag, packet: packet["score"] >= 85)
+            >> Transform(
+                lambda tag, packet: (
+                    {**tag, "category": "high"},
+                    {**packet, "bonus": packet["score"] * 0.1},
+                )
+            )
+        )
+
+        low_performers = (
+            source_stream
+            >> Filter(lambda tag, packet: packet["score"] < 85)
+            >> Transform(
+                lambda tag, packet: (
+                    {**tag, "category": "low"},
+                    {**packet, "training": True},
+                )
+            )
+        )
+
+        # Merge results
+        combined = Merge()(high_performers, low_performers)
+        result = list(combined)
+
+        # Check that all items are categorized
+        categories = [tag["category"] for tag, packet in result]
+        assert "high" in categories
+        assert "low" in categories
+
+        # Check conditional processing
+        for tag, packet in result:
+            if tag["category"] == "high":
+                assert "bonus" in packet
+                assert packet["score"] >= 85
+            else:
+                assert "training" in packet
+                assert packet["score"] < 85
+
+
+class TestPipelineWithSources:
+    """Test pipelines starting from sources."""
+
+    def test_pipeline_from_glob_source(self, temp_files):
+        """Test pipeline starting from GlobSource."""
+        temp_dir, files = temp_files
+
+        # Create source
+        source = GlobSource(str(temp_dir / "*.txt"))
+
+        # Build pipeline
+        pipeline = (
+            source
+            >> Transform(
+                lambda tag, packet: (
+                    {**tag, "processed": True},
+                    {**packet, "line_count": len(packet["content"].split("\n"))},
+                )
+            )
+            >> Filter(lambda tag, packet: packet["line_count"] >= 2)
+        )
+
+        result = list(pipeline)
+
+        # Should have all files (each has 2 lines)
+        assert len(result) == 3
+
+        # Check processing
+        for tag, packet in result:
+            assert tag["processed"] is True
+            assert packet["line_count"] == 2
+            assert "path" in tag
+
+    def test_pipeline_with_function_pod(self, sample_user_data):
+        """Test pipeline with FunctionPod processing."""
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Create processing function
+        def enrich_user_data(tag, packet):
+            """Add computed fields to user data."""
+            return tag, {
+                **packet,
+                "age_group": "young" if packet["age"] < 30 else "mature",
+                "performance": "excellent" if packet["score"] >= 90 else "good",
+            }
+
+        # Create pod
+        processor = FunctionPod(enrich_user_data)
+
+        # Build pipeline
+        pipeline = (
+            source_stream
+            >> processor
+            >> Filter(lambda tag, packet: packet["performance"] == "excellent")
+        )
+
+        result = list(pipeline)
+
+        # Check processing
+        for tag, packet in result:
+            assert packet["performance"] == "excellent"
+            assert packet["age_group"] in ["young", "mature"]
+            assert packet["score"] >= 90
+
+
+class TestPipelineOptimization:
+    """Test pipeline optimization and efficiency."""
+
+    def test_pipeline_lazy_evaluation(self, sample_user_data):
+        """Test that pipeline operations are lazily evaluated."""
+        call_log = []
+
+        def logging_transform(tag, packet):
+            call_log.append(f"processing_{tag['user_id']}")
+            return tag, packet
+
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Build pipeline but don't execute
+        pipeline = (
+            source_stream
+            >> Transform(logging_transform)
+            >> Filter(lambda tag, packet: packet["age"] >= 28)
+        )
+
+        # No processing should have happened yet
+        assert call_log == []
+
+        # Start consuming pipeline
+        iterator = iter(pipeline)
+        next(iterator)
+
+        # Now some processing should have happened
+        assert len(call_log) >= 1
+
+    def test_pipeline_memory_efficiency(self):
+        """Test pipeline memory efficiency with large data."""
+
+        def large_data_generator():
+            for i in range(1000):
+                yield ({"id": i}, {"value": i * 2, "data": f"item_{i}"})
+
+        # Create pipeline that processes large stream
+        from orcabridge.stream import SyncStreamFromGenerator
+
+        source = SyncStreamFromGenerator(large_data_generator)
+        pipeline = (
+            source
+            >> Filter(lambda tag, packet: tag["id"] % 10 == 0)  # Keep every 10th item
+            >> Transform(lambda tag, packet: (tag, {**packet, "filtered": True}))
+        )
+
+        # Process in chunks
+        count = 0
+        for tag, packet in pipeline:
+            assert packet["filtered"] is True
+            assert tag["id"] % 10 == 0
+            count += 1
+
+            if count >= 10:  # Don't process all items
+                break
+
+        assert count == 10
+
+    def test_pipeline_error_recovery(self, sample_user_data):
+        """Test pipeline behavior with partial errors."""
+        tags, packets = zip(*sample_user_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        def sometimes_failing_transform(tag, packet):
+            if packet["name"] == "Bob":  # Fail for Bob
+                raise ValueError("Bob processing failed")
+            return tag, {**packet, "processed": True}
+
+        # This pipeline will fail partway through
+        pipeline = source_stream >> Transform(sometimes_failing_transform)
+
+        # Should fail when reaching Bob
+        with pytest.raises(ValueError, match="Bob processing failed"):
+            list(pipeline)
+
+    def test_pipeline_reusability(self, sample_user_data):
+        """Test that pipeline components can be reused."""
+        # Create reusable operations
+        age_filter = Filter(lambda tag, packet: packet["age"] >= 28)
+        score_transform = Transform(
+            lambda tag, packet: (
+                tag,
+                {**packet, "grade": "A" if packet["score"] >= 90 else "B"},
+            )
+        )
+
+        tags, packets = zip(*sample_user_data)
+        stream1 = SyncStreamFromLists(list(tags[:2]), list(packets[:2]))
+        stream2 = SyncStreamFromLists(list(tags[2:]), list(packets[2:]))
+
+        # Apply same operations to different streams
+        pipeline1 = stream1 >> age_filter >> score_transform
+        pipeline2 = stream2 >> age_filter >> score_transform
+
+        result1 = list(pipeline1)
+        result2 = list(pipeline2)
+
+        # Both should work independently
+        for tag, packet in result1 + result2:
+            if len([tag, packet]) > 0:  # If any results
+                assert packet["age"] >= 28
+                assert packet["grade"] in ["A", "B"]
diff --git a/tests/test_streams_operations/test_pipelines/test_recursive_features.py b/tests/test_streams_operations/test_pipelines/test_recursive_features.py
new file mode 100644
index 0000000..2c6daa9
--- /dev/null
+++ b/tests/test_streams_operations/test_pipelines/test_recursive_features.py
@@ -0,0 +1,637 @@
+"""
+Test module for recursive features and advanced pipeline patterns.
+
+This module tests advanced orcabridge features including recursive stream
+operations, label chaining, length operations, source invocation patterns,
+and complex pipeline compositions as demonstrated in the notebooks.
+"""
+
+import pytest
+import tempfile
+from pathlib import Path
+from unittest.mock import Mock, patch
+
+from orcabridge.base import SyncStream, Operation
+from orcabridge.stream import SyncStreamFromLists, SyncStreamFromGenerator
+from orcabridge.mapper import (
+    Join,
+    Merge,
+    Filter,
+    Transform,
+    MapPackets,
+    MapTags,
+    Repeat,
+    DefaultTag,
+    Batch,
+    CacheStream,
+)
+from orcabridge.sources import GlobSource
+from orcabridge.pod import FunctionPod
+
+
+@pytest.fixture
+def hierarchical_data():
+    """Hierarchical data for testing recursive operations."""
+    return [
+        (
+            {"level": 1, "parent": None, "id": "root"},
+            {"name": "Root", "children": ["a", "b"]},
+        ),
+        (
+            {"level": 2, "parent": "root", "id": "a"},
+            {"name": "Node A", "children": ["a1", "a2"]},
+        ),
+        (
+            {"level": 2, "parent": "root", "id": "b"},
+            {"name": "Node B", "children": ["b1"]},
+        ),
+        ({"level": 3, "parent": "a", "id": "a1"}, {"name": "Leaf A1", "children": []}),
+        ({"level": 3, "parent": "a", "id": "a2"}, {"name": "Leaf A2", "children": []}),
+        ({"level": 3, "parent": "b", "id": "b1"}, {"name": "Leaf B1", "children": []}),
+    ]
+
+
+@pytest.fixture
+def temp_nested_files():
+    """Create nested file structure for testing."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        temp_path = Path(temp_dir)
+
+        # Create nested directory structure
+        (temp_path / "level1").mkdir()
+        (temp_path / "level1" / "level2").mkdir()
+
+        files = {}
+
+        # Root level files
+        for i in range(3):
+            file_path = temp_path / f"root_{i}.txt"
+            with open(file_path, "w") as f:
+                f.write(f"Root file {i}")
+            files[f"root_{i}"] = file_path
+
+        # Level 1 files
+        for i in range(2):
+            file_path = temp_path / "level1" / f"l1_{i}.txt"
+            with open(file_path, "w") as f:
+                f.write(f"Level 1 file {i}")
+            files[f"l1_{i}"] = file_path
+
+        # Level 2 files
+        file_path = temp_path / "level1" / "level2" / "l2_0.txt"
+        with open(file_path, "w") as f:
+            f.write("Level 2 file")
+        files["l2_0"] = file_path
+
+        yield temp_path, files
+
+
+class TestRecursiveStreamOperations:
+    """Test recursive and self-referential stream operations."""
+
+    def test_recursive_stream_processing(self, hierarchical_data):
+        """Test recursive processing of hierarchical data."""
+        tags, packets = zip(*hierarchical_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        def process_level(stream, max_level=3):
+            """Recursively process each level."""
+
+            def level_processor(tag, packet):
+                level = tag["level"]
+                if level < max_level:
+                    # Add processing marker
+                    return tag, {**packet, f"processed_level_{level}": True}
+                else:
+                    # Leaf nodes get different processing
+                    return tag, {**packet, "is_leaf": True}
+
+            return Transform(level_processor)(stream)
+
+        # Apply recursive processing
+        processed = process_level(source_stream)
+        result = list(processed)
+
+        # Check that different levels are processed differently
+        for tag, packet in result:
+            level = tag["level"]
+            if level < 3:
+                assert f"processed_level_{level}" in packet
+            else:
+                assert packet["is_leaf"] is True
+
+    def test_recursive_stream_expansion(self, hierarchical_data):
+        """Test recursive expansion of stream data."""
+        # Start with root nodes only
+        root_data = [item for item in hierarchical_data if item[0]["parent"] is None]
+        tags, packets = zip(*root_data)
+        root_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        def expand_children(tag, packet):
+            """Generate child nodes for each parent."""
+            children = packet.get("children", [])
+            for child_id in children:
+                # Find child data from hierarchical_data
+                for h_tag, h_packet in hierarchical_data:
+                    if h_tag["id"] == child_id:
+                        yield h_tag, h_packet
+                        break
+
+        # Create expanding pod
+        expander = FunctionPod(expand_children)
+        expanded = expander(root_stream)
+        result = list(expanded)
+
+        # Should have expanded to include all children
+        assert len(result) >= 2  # At least the immediate children
+
+    def test_recursive_filtering_cascade(self, hierarchical_data):
+        """Test recursive filtering that cascades through levels."""
+        tags, packets = zip(*hierarchical_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Create a cascade of filters for each level
+        level_1_filter = Filter(lambda tag, packet: tag["level"] == 1)
+        level_2_filter = Filter(lambda tag, packet: tag["level"] <= 2)
+        level_3_filter = Filter(lambda tag, packet: tag["level"] <= 3)
+
+        # Apply filters recursively
+        def recursive_filter(stream, current_level=1):
+            if current_level == 1:
+                filtered = level_1_filter(stream)
+            elif current_level == 2:
+                filtered = level_2_filter(stream)
+            else:
+                filtered = level_3_filter(stream)
+
+            return filtered
+
+        # Test each level
+        level_1_result = list(recursive_filter(source_stream, 1))
+        level_2_result = list(recursive_filter(source_stream, 2))
+        level_3_result = list(recursive_filter(source_stream, 3))
+
+        assert len(level_1_result) == 1  # Only root
+        assert len(level_2_result) == 3  # Root + level 2 nodes
+        assert len(level_3_result) == 6  # All nodes
+
+    def test_self_referential_stream_operations(self, hierarchical_data):
+        """Test operations that reference the stream itself."""
+        tags, packets = zip(*hierarchical_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Cache the stream for self-reference
+        cache = CacheStream()
+        cached_stream = cache(source_stream)
+
+        # Consume the cache
+        list(cached_stream)
+
+        # Now create operations that reference the cached data
+        def find_parent_info(tag, packet):
+            parent_id = tag.get("parent")
+            if parent_id:
+                # Look up parent in cached stream
+                for cached_tag, cached_packet in cache.cache:
+                    if cached_tag["id"] == parent_id:
+                        return tag, {
+                            **packet,
+                            "parent_name": cached_packet["name"],
+                            "parent_level": cached_tag["level"],
+                        }
+            return tag, {**packet, "parent_name": None, "parent_level": None}
+
+        # Apply parent lookup
+        enriched = Transform(find_parent_info)(cached_stream)
+        result = list(enriched)
+
+        # Check parent information was added
+        for tag, packet in result:
+            if tag["parent"] is not None:
+                assert packet["parent_name"] is not None
+                assert packet["parent_level"] is not None
+
+
+class TestLabelAndLengthOperations:
+    """Test label manipulation and length operations."""
+
+    def test_label_chaining_operations(self, hierarchical_data):
+        """Test chaining operations with label tracking."""
+        tags, packets = zip(*hierarchical_data)
+        source_stream = SyncStreamFromLists(
+            list(tags), list(packets), label="hierarchical_source"
+        )
+
+        # Create labeled operations
+        filter_op = Filter(lambda tag, packet: tag["level"] <= 2)
+        transform_op = Transform(
+            lambda tag, packet: (tag, {**packet, "processed": True})
+        )
+
+        # Apply operations and track labels
+        filtered = filter_op(source_stream)
+        assert filtered.label.startswith("Filter_")
+
+        transformed = transform_op(filtered)
+        assert transformed.label.startswith("Transform_")
+
+        # Check that invocation chain is maintained
+        result = list(transformed)
+        assert len(result) == 3  # Root + 2 level-2 nodes
+
+    def test_stream_length_operations(self, hierarchical_data):
+        """Test operations that depend on stream length."""
+        tags, packets = zip(*hierarchical_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        def length_dependent_transform(tag, packet):
+            # This would need to know stream length
+            # For simulation, we'll use a mock length
+            stream_length = 6  # Known length of hierarchical_data
+            return tag, {
+                **packet,
+                "relative_position": tag["level"] / 3,  # Relative to max level
+                "is_majority_level": tag["level"] == 3,  # Most nodes are level 3
+            }
+
+        processed = Transform(length_dependent_transform)(source_stream)
+        result = list(processed)
+
+        # Check length-dependent calculations
+        for tag, packet in result:
+            assert "relative_position" in packet
+            assert "is_majority_level" in packet
+            if tag["level"] == 3:
+                assert packet["is_majority_level"] is True
+
+    def test_dynamic_label_generation(self, hierarchical_data):
+        """Test dynamic label generation based on stream content."""
+        tags, packets = zip(*hierarchical_data)
+
+        # Create streams with content-based labels
+        def create_labeled_stream(data, label_func):
+            stream_tags, stream_packets = zip(*data)
+            label = label_func(data)
+            return SyncStreamFromLists(
+                list(stream_tags), list(stream_packets), label=label
+            )
+
+        # Different labeling strategies
+        level_1_data = [item for item in hierarchical_data if item[0]["level"] == 1]
+        level_2_data = [item for item in hierarchical_data if item[0]["level"] == 2]
+        level_3_data = [item for item in hierarchical_data if item[0]["level"] == 3]
+
+        stream_1 = create_labeled_stream(
+            level_1_data, lambda data: f"level_1_stream_{len(data)}_items"
+        )
+        stream_2 = create_labeled_stream(
+            level_2_data, lambda data: f"level_2_stream_{len(data)}_items"
+        )
+        stream_3 = create_labeled_stream(
+            level_3_data, lambda data: f"level_3_stream_{len(data)}_items"
+        )
+
+        assert stream_1.label == "level_1_stream_1_items"
+        assert stream_2.label == "level_2_stream_2_items"
+        assert stream_3.label == "level_3_stream_3_items"
+
+
+class TestSourceInvocationPatterns:
+    """Test advanced source invocation and composition patterns."""
+
+    def test_multiple_source_composition(self, temp_nested_files):
+        """Test composing multiple sources with different patterns."""
+        temp_path, files = temp_nested_files
+
+        # Create different sources for different levels
+        root_source = GlobSource(str(temp_path / "*.txt"), label="root_files")
+        level1_source = GlobSource(
+            str(temp_path / "level1" / "*.txt"), label="level1_files"
+        )
+        level2_source = GlobSource(
+            str(temp_path / "level1" / "level2" / "*.txt"), label="level2_files"
+        )
+
+        # Compose sources
+        all_sources = Merge()(root_source, level1_source, level2_source)
+        result = list(all_sources)
+
+        # Should have files from all levels
+        assert len(result) >= 6  # 3 root + 2 level1 + 1 level2
+
+        # Check that files from different levels are included
+        paths = [tag["path"] for tag, packet in result]
+        assert any("root_" in str(path) for path in paths)
+        assert any("l1_" in str(path) for path in paths)
+        assert any("l2_" in str(path) for path in paths)
+
+    def test_conditional_source_invocation(self, temp_nested_files):
+        """Test conditional source invocation based on data content."""
+        temp_path, files = temp_nested_files
+
+        def conditional_source_factory(condition):
+            """Create source based on condition."""
+            if condition == "root":
+                return GlobSource(str(temp_path / "*.txt"))
+            elif condition == "nested":
+                return GlobSource(str(temp_path / "**" / "*.txt"))
+            else:
+                return SyncStreamFromLists([], [])  # Empty stream
+
+        # Test different conditions
+        root_stream = conditional_source_factory("root")
+        nested_stream = conditional_source_factory("nested")
+        empty_stream = conditional_source_factory("other")
+
+        root_result = list(root_stream)
+        nested_result = list(nested_stream)
+        empty_result = list(empty_stream)
+
+        assert len(root_result) == 3  # Only root files
+        assert len(nested_result) >= 6  # All files recursively
+        assert len(empty_result) == 0
+
+    def test_recursive_source_generation(self, temp_nested_files):
+        """Test recursive generation of sources."""
+        temp_path, files = temp_nested_files
+
+        def recursive_file_processor(tag, packet):
+            """Process file and potentially generate more sources."""
+            file_path = Path(tag["path"])
+
+            # If this is a directory-like file, yield info about subdirectories
+            if "level1" in str(file_path.parent):
+                # This file is in level1, so it knows about level2
+                yield tag, {**packet, "has_subdirs": True, "subdir_count": 1}
+            else:
+                yield tag, {**packet, "has_subdirs": False, "subdir_count": 0}
+
+        # Start with root source
+        root_source = GlobSource(str(temp_path / "*.txt"))
+
+        # Apply recursive processing
+        processor = FunctionPod(recursive_file_processor)
+        processed = processor(root_source)
+        result = list(processed)
+
+        # Check recursive information
+        for tag, packet in result:
+            assert "has_subdirs" in packet
+            assert "subdir_count" in packet
+
+    def test_source_caching_and_reuse(self, temp_nested_files):
+        """Test caching and reusing source results."""
+        temp_path, files = temp_nested_files
+
+        # Create cached source
+        source = GlobSource(str(temp_path / "*.txt"))
+        cache = CacheStream()
+        cached_source = cache(source)
+
+        # First consumption
+        result1 = list(cached_source)
+
+        # Verify caching worked
+        assert cache.is_cached
+        assert len(cache.cache) == 3
+
+        # Create new operations using cached source
+        filter_op = Filter(lambda tag, packet: "root_1" in str(tag["path"]))
+        transform_op = Transform(lambda tag, packet: (tag, {**packet, "reused": True}))
+
+        # Apply operations to cached source
+        filtered = filter_op(cache())  # Use cached version
+        transformed = transform_op(cache())  # Use cached version again
+
+        filter_result = list(filtered)
+        transform_result = list(transformed)
+
+        # Both should work independently using cached data
+        assert len(filter_result) == 1  # Only root_1 file
+        assert len(transform_result) == 3  # All files with reused flag
+
+        for tag, packet in transform_result:
+            assert packet["reused"] is True
+
+
+class TestComplexPipelinePatterns:
+    """Test complex pipeline patterns and compositions."""
+
+    def test_branching_and_merging_pipeline(self, hierarchical_data):
+        """Test pipeline that branches and merges back together."""
+        tags, packets = zip(*hierarchical_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Create branches for different processing paths
+        branch_a = (
+            source_stream
+            >> Filter(lambda tag, packet: tag["level"] <= 2)
+            >> Transform(
+                lambda tag, packet: (tag, {**packet, "branch": "A", "priority": "high"})
+            )
+        )
+
+        branch_b = (
+            source_stream
+            >> Filter(lambda tag, packet: tag["level"] == 3)
+            >> Transform(
+                lambda tag, packet: (tag, {**packet, "branch": "B", "priority": "low"})
+            )
+        )
+
+        # Merge branches back together
+        merged = Merge()(branch_a, branch_b)
+        result = list(merged)
+
+        # Should have all original items but with branch processing
+        assert len(result) == 6
+
+        # Check branch assignments
+        branches = [packet["branch"] for tag, packet in result]
+        assert "A" in branches
+        assert "B" in branches
+
+    def test_multi_level_pipeline_composition(self, hierarchical_data):
+        """Test multi-level pipeline composition with nested operations."""
+        tags, packets = zip(*hierarchical_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Level 1: Basic filtering and transformation
+        level1_pipeline = (
+            source_stream
+            >> Filter(lambda tag, packet: len(packet["name"]) > 5)
+            >> Transform(
+                lambda tag, packet: (tag, {**packet, "level1_processed": True})
+            )
+        )
+
+        # Level 2: Advanced processing based on level 1
+        level2_pipeline = (
+            level1_pipeline
+            >> MapTags({"level": "hierarchy_level", "id": "node_id"})
+            >> MapPackets({"name": "node_name", "children": "child_nodes"})
+        )
+
+        # Level 3: Final aggregation and summary
+        level3_pipeline = level2_pipeline >> Transform(
+            lambda tag, packet: (
+                tag,
+                {
+                    **packet,
+                    "final_processed": True,
+                    "child_count": len(packet["child_nodes"]),
+                    "has_children": len(packet["child_nodes"]) > 0,
+                },
+            )
+        )
+
+        result = list(level3_pipeline)
+
+        # Check multi-level processing
+        for tag, packet in result:
+            assert packet["level1_processed"] is True
+            assert packet["final_processed"] is True
+            assert "hierarchy_level" in tag
+            assert "node_name" in packet
+            assert "child_count" in packet
+
+    def test_pipeline_with_feedback_loop(self, hierarchical_data):
+        """Test pipeline pattern that simulates feedback loops."""
+        tags, packets = zip(*hierarchical_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        # Create a cache to simulate feedback
+        feedback_cache = CacheStream()
+
+        # First pass: process and cache
+        first_pass = (
+            source_stream
+            >> Transform(lambda tag, packet: (tag, {**packet, "pass": 1}))
+            >> feedback_cache
+        )
+
+        # Consume first pass to populate cache
+        first_result = list(first_pass)
+
+        # Second pass: use cached data for enrichment
+        def enrich_with_feedback(tag, packet):
+            # Use cached data to enrich current item
+            related_items = []
+            for cached_tag, cached_packet in feedback_cache.cache:
+                if (
+                    cached_tag["level"] == tag["level"]
+                    and cached_tag["id"] != tag["id"]
+                ):
+                    related_items.append(cached_packet["name"])
+
+            return tag, {
+                **packet,
+                "pass": 2,
+                "related_items": related_items,
+                "relation_count": len(related_items),
+            }
+
+        second_pass = Transform(enrich_with_feedback)(feedback_cache())
+        second_result = list(second_pass)
+
+        # Check feedback enrichment
+        for tag, packet in second_result:
+            assert packet["pass"] == 2
+            assert "related_items" in packet
+            assert "relation_count" in packet
+
+    def test_pipeline_error_handling_and_recovery(self, hierarchical_data):
+        """Test pipeline error handling and recovery patterns."""
+        tags, packets = zip(*hierarchical_data)
+        source_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        def potentially_failing_operation(tag, packet):
+            # Fail on specific condition
+            if tag["id"] == "a1":  # Fail on specific node
+                raise ValueError("Processing failed for a1")
+            return tag, {**packet, "processed": True}
+
+        # Create error-tolerant pipeline
+        def error_tolerant_transform(tag, packet):
+            try:
+                return potentially_failing_operation(tag, packet)
+            except ValueError:
+                # Recovery: mark as failed but continue
+                return tag, {**packet, "processed": False, "error": True}
+
+        pipeline = Transform(error_tolerant_transform)(source_stream)
+        result = list(pipeline)
+
+        # Should have processed all items despite error
+        assert len(result) == 6
+
+        # Check error handling
+        failed_items = [
+            item for tag, packet in result for item in [packet] if packet.get("error")
+        ]
+        successful_items = [
+            item
+            for tag, packet in result
+            for item in [packet]
+            if packet.get("processed")
+        ]
+
+        assert len(failed_items) == 1  # One failed item
+        assert len(successful_items) == 5  # Five successful items
+
+    def test_dynamic_pipeline_construction(self, hierarchical_data):
+        """Test dynamic construction of pipelines based on data characteristics."""
+        tags, packets = zip(*hierarchical_data)
+
+        def build_dynamic_pipeline(data):
+            """Build pipeline based on data characteristics."""
+            # Analyze data
+            levels = set(tag["level"] for tag, packet in data)
+            max_level = max(levels)
+            has_children = any(len(packet["children"]) > 0 for tag, packet in data)
+
+            # Build pipeline dynamically
+            base_stream = SyncStreamFromLists(
+                [tag for tag, packet in data], [packet for tag, packet in data]
+            )
+
+            operations = [base_stream]
+
+            # Add level-specific processing
+            if max_level > 2:
+                operations.append(
+                    Transform(
+                        lambda tag, packet: (tag, {**packet, "is_deep_hierarchy": True})
+                    )
+                )
+
+            # Add child processing if needed
+            if has_children:
+                operations.append(
+                    Transform(
+                        lambda tag, packet: (
+                            tag,
+                            {
+                                **packet,
+                                "child_info": f"has_{len(packet['children'])}_children",
+                            },
+                        )
+                    )
+                )
+
+            # Chain operations
+            pipeline = operations[0]
+            for op in operations[1:]:
+                if isinstance(op, Transform):
+                    pipeline = op(pipeline)
+
+            return pipeline
+
+        # Build and execute dynamic pipeline
+        dynamic_pipeline = build_dynamic_pipeline(hierarchical_data)
+        result = list(dynamic_pipeline)
+
+        # Check dynamic processing
+        for tag, packet in result:
+            assert "is_deep_hierarchy" in packet  # Should be added due to max_level > 2
+            assert "child_info" in packet  # Should be added due to has_children
diff --git a/tests/test_streams_operations/test_pods/__init__.py b/tests/test_streams_operations/test_pods/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_streams_operations/test_pods/test_function_pod.py b/tests/test_streams_operations/test_pods/test_function_pod.py
new file mode 100644
index 0000000..1b1f0a8
--- /dev/null
+++ b/tests/test_streams_operations/test_pods/test_function_pod.py
@@ -0,0 +1,305 @@
+"""Tests for FunctionPod functionality."""
+
+import pytest
+from orcabridge.pod import FunctionPod
+from orcabridge.stream import SyncStreamFromLists
+
+
+class TestFunctionPod:
+    """Test cases for FunctionPod."""
+
+    def test_function_pod_no_output(self, sample_stream, func_no_output):
+        """Test function pod with function that has no output."""
+        pod = FunctionPod(func_no_output)
+        result_stream = pod(sample_stream)
+
+        result = list(result_stream)
+
+        # Should produce no output
+        assert len(result) == 0
+
+    def test_function_pod_single_output(self, sample_stream, func_single_output):
+        """Test function pod with function that has single output."""
+        pod = FunctionPod(func_single_output)
+        result_stream = pod(sample_stream)
+
+        result = list(result_stream)
+
+        # Should produce one output per input
+        original_packets = list(sample_stream)
+        assert len(result) == len(original_packets)
+
+        for i, (packet, tag) in enumerate(result):
+            expected_packet = f"processed_{original_packets[i][0]}"
+            assert packet == expected_packet
+
+    def test_function_pod_multiple_outputs(self, sample_stream, func_multiple_outputs):
+        """Test function pod with function that has multiple outputs."""
+        pod = FunctionPod(func_multiple_outputs)
+        result_stream = pod(sample_stream)
+
+        result = list(result_stream)
+
+        # Should produce two outputs per input
+        original_packets = list(sample_stream)
+        assert len(result) == len(original_packets) * 2
+
+        # Check that we get pairs of outputs
+        for i in range(0, len(result), 2):
+            original_idx = i // 2
+            original_packet = original_packets[original_idx][0]
+
+            # First output should be the packet itself
+            assert result[i][0] == original_packet
+            # Second output should be uppercased
+            assert result[i + 1][0] == str(original_packet).upper()
+
+    def test_function_pod_error_function(self, sample_stream, func_with_error):
+        """Test function pod with function that raises error."""
+        pod = FunctionPod(func_with_error)
+        result_stream = pod(sample_stream)
+
+        # Should raise error when processing
+        with pytest.raises(ValueError, match="Function error"):
+            list(result_stream)
+
+    def test_function_pod_with_datastore(self, func_single_output, data_store):
+        """Test function pod with datastore integration."""
+
+        # Create a function that uses the datastore
+        def datastore_function(inputs, datastore):
+            packet, tag = inputs[0]
+            # Store and retrieve from datastore
+            datastore["processed_count"] = datastore.get("processed_count", 0) + 1
+            return f"item_{datastore['processed_count']}_{packet}"
+
+        pod = FunctionPod(datastore_function, datastore=data_store)
+
+        packets = ["a", "b", "c"]
+        tags = ["tag1", "tag2", "tag3"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        result_stream = pod(stream)
+        result = list(result_stream)
+
+        # Should use datastore to track processing
+        expected = [("item_1_a", "tag1"), ("item_2_b", "tag2"), ("item_3_c", "tag3")]
+        assert result == expected
+        assert data_store["processed_count"] == 3
+
+    def test_function_pod_different_input_counts(self):
+        """Test function pod with functions expecting different input counts."""
+
+        # Function expecting 1 input
+        def single_input_func(inputs):
+            packet, tag = inputs[0]
+            return f"single_{packet}"
+
+        # Function expecting 2 inputs
+        def double_input_func(inputs):
+            if len(inputs) < 2:
+                return None  # Not enough inputs
+            packet1, tag1 = inputs[0]
+            packet2, tag2 = inputs[1]
+            return f"combined_{packet1}_{packet2}"
+
+        packets = ["a", "b", "c", "d"]
+        tags = ["t1", "t2", "t3", "t4"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        # Test single input function
+        pod1 = FunctionPod(single_input_func)
+        result1 = list(pod1(stream))
+
+        assert len(result1) == 4
+        assert result1[0][0] == "single_a"
+        assert result1[1][0] == "single_b"
+
+        # Test double input function (if supported)
+        # This behavior depends on FunctionPod implementation
+        try:
+            pod2 = FunctionPod(double_input_func, input_count=2)
+            stream2 = SyncStreamFromLists(packets, tags)
+            result2 = list(pod2(stream2))
+
+            # Should produce fewer outputs since it needs 2 inputs per call
+            assert len(result2) <= len(packets)
+
+        except (TypeError, AttributeError):
+            # FunctionPod might not support configurable input counts
+            pass
+
+    def test_function_pod_with_none_outputs(self, sample_stream):
+        """Test function pod with function that sometimes returns None."""
+
+        def conditional_function(inputs):
+            packet, tag = inputs[0]
+            # Only process strings
+            if isinstance(packet, str):
+                return f"processed_{packet}"
+            return None  # Skip non-strings
+
+        # Mix of string and non-string packets
+        packets = ["hello", 42, "world", None, "test"]
+        tags = ["str1", "int1", "str2", "null1", "str3"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        pod = FunctionPod(conditional_function)
+        result_stream = pod(stream)
+        result = list(result_stream)
+
+        # Should only process string packets
+        string_packets = [p for p in packets if isinstance(p, str)]
+        assert len(result) == len(string_packets)
+
+        for packet, _ in result:
+            assert packet.startswith("processed_")
+
+    def test_function_pod_stateful_function(self, data_store):
+        """Test function pod with stateful function using datastore."""
+
+        def stateful_function(inputs, datastore):
+            packet, tag = inputs[0]
+
+            # Keep running total
+            if "total" not in datastore:
+                datastore["total"] = 0
+            if "count" not in datastore:
+                datastore["count"] = 0
+
+            if isinstance(packet, (int, float)):
+                datastore["total"] += packet
+                datastore["count"] += 1
+                avg = datastore["total"] / datastore["count"]
+                return f"avg_so_far_{avg:.2f}"
+
+            return None
+
+        packets = [10, 20, 30, 40]
+        tags = ["n1", "n2", "n3", "n4"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        pod = FunctionPod(stateful_function, datastore=data_store)
+        result_stream = pod(stream)
+        result = list(result_stream)
+
+        # Should produce running averages
+        assert len(result) == 4
+        assert result[0][0] == "avg_so_far_10.00"  # 10/1
+        assert result[1][0] == "avg_so_far_15.00"  # (10+20)/2
+        assert result[2][0] == "avg_so_far_20.00"  # (10+20+30)/3
+        assert result[3][0] == "avg_so_far_25.00"  # (10+20+30+40)/4
+
+    def test_function_pod_generator_output(self, sample_stream):
+        """Test function pod with function that yields multiple outputs."""
+
+        def generator_function(inputs):
+            packet, tag = inputs[0]
+            # Yield multiple outputs for each input
+            for i in range(3):
+                yield f"{packet}_part_{i}"
+
+        pod = FunctionPod(generator_function)
+        result_stream = pod(sample_stream)
+        result = list(result_stream)
+
+        # Should produce 3 outputs per input
+        original_packets = list(sample_stream)
+        assert len(result) == len(original_packets) * 3
+
+        # Check pattern of outputs
+        for i, (packet, tag) in enumerate(result):
+            original_idx = i // 3
+            part_idx = i % 3
+            original_packet = original_packets[original_idx][0]
+            expected_packet = f"{original_packet}_part_{part_idx}"
+            assert packet == expected_packet
+
+    def test_function_pod_complex_data_transformation(self):
+        """Test function pod with complex data transformation."""
+
+        def json_processor(inputs):
+            packet, tag = inputs[0]
+
+            if isinstance(packet, dict):
+                # Extract all values and create separate outputs
+                for key, value in packet.items():
+                    yield f"{key}={value}"
+            else:
+                yield f"non_dict_{packet}"
+
+        packets = [
+            {"name": "Alice", "age": 30},
+            "simple_string",
+            {"x": 1, "y": 2, "z": 3},
+        ]
+        tags = ["person", "text", "coordinates"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        pod = FunctionPod(json_processor)
+        result_stream = pod(stream)
+        result = list(result_stream)
+
+        # Should extract dict entries
+        result_packets = [packet for packet, _ in result]
+
+        assert "name=Alice" in result_packets
+        assert "age=30" in result_packets
+        assert "non_dict_simple_string" in result_packets
+        assert "x=1" in result_packets
+        assert "y=2" in result_packets
+        assert "z=3" in result_packets
+
+    def test_function_pod_empty_stream(self, func_single_output):
+        """Test function pod with empty stream."""
+        empty_stream = SyncStreamFromLists([], [])
+        pod = FunctionPod(func_single_output)
+        result_stream = pod(empty_stream)
+
+        result = list(result_stream)
+        assert len(result) == 0
+
+    def test_function_pod_large_stream(self, func_single_output):
+        """Test function pod with large stream."""
+        packets = [f"packet_{i}" for i in range(1000)]
+        tags = [f"tag_{i}" for i in range(1000)]
+        stream = SyncStreamFromLists(packets, tags)
+
+        pod = FunctionPod(func_single_output)
+        result_stream = pod(stream)
+
+        # Process stream lazily to test memory efficiency
+        count = 0
+        for packet, tag in result_stream:
+            count += 1
+            if count == 100:  # Stop early
+                break
+
+        assert count == 100
+
+    def test_function_pod_chaining(self, func_single_output):
+        """Test chaining function pods."""
+
+        def second_processor(inputs):
+            packet, tag = inputs[0]
+            return f"second_{packet}"
+
+        packets = ["a", "b", "c"]
+        tags = ["t1", "t2", "t3"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        # Chain two function pods
+        pod1 = FunctionPod(func_single_output)
+        pod2 = FunctionPod(second_processor)
+
+        intermediate_stream = pod1(stream)
+        final_stream = pod2(intermediate_stream)
+        result = list(final_stream)
+
+        # Should apply both transformations
+        expected = [
+            ("second_processed_a", "t1"),
+            ("second_processed_b", "t2"),
+            ("second_processed_c", "t3"),
+        ]
+        assert result == expected
diff --git a/tests/test_streams_operations/test_pods/test_function_pod_datastore.py b/tests/test_streams_operations/test_pods/test_function_pod_datastore.py
new file mode 100644
index 0000000..e3a2fa4
--- /dev/null
+++ b/tests/test_streams_operations/test_pods/test_function_pod_datastore.py
@@ -0,0 +1,403 @@
+"""
+Test module for FunctionPod datastore integration.
+
+This module tests FunctionPod functionality when working with datastore operations,
+including storage, retrieval, and state management across pod invocations.
+"""
+
+import pytest
+import tempfile
+import os
+from pathlib import Path
+
+from orcabridge.pod import FunctionPod
+from orcabridge.stream import SyncStreamFromLists
+
+
+@pytest.fixture
+def temp_datastore():
+    """Create a temporary datastore directory."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        yield Path(temp_dir)
+
+
+@pytest.fixture
+def sample_stream_data():
+    """Sample stream data for testing."""
+    return [
+        ({"file_id": 1}, {"content": "Hello World", "metadata": {"type": "text"}}),
+        ({"file_id": 2}, {"content": "Python Code", "metadata": {"type": "code"}}),
+        (
+            {"file_id": 3},
+            {"content": "Data Analysis", "metadata": {"type": "analysis"}},
+        ),
+    ]
+
+
+@pytest.fixture
+def sample_stream(sample_stream_data):
+    """Create a sample stream."""
+    tags, packets = zip(*sample_stream_data)
+    return SyncStreamFromLists(list(tags), list(packets))
+
+
+class TestFunctionPodDatastore:
+    """Test cases for FunctionPod datastore integration."""
+
+    def test_datastore_saving_function(self, temp_datastore, sample_stream):
+        """Test FunctionPod with function that saves data to datastore."""
+
+        def save_to_datastore(tag, packet, datastore):
+            """Save packet content to datastore."""
+            file_id = tag["file_id"]
+            content = packet["content"]
+
+            # Create file path
+            file_path = datastore / f"file_{file_id}.txt"
+
+            # Save content
+            with open(file_path, "w") as f:
+                f.write(content)
+
+            # Return tag and packet with file path
+            return tag, {**packet, "saved_path": str(file_path)}
+
+        # Create pod with datastore
+        pod = FunctionPod(save_to_datastore, datastore=temp_datastore)
+
+        # Process stream
+        result_stream = pod(sample_stream)
+        result = list(result_stream)
+
+        # Check results
+        assert len(result) == 3
+
+        # Verify files were created
+        for i, (tag, packet) in enumerate(result, 1):
+            expected_path = temp_datastore / f"file_{i}.txt"
+            assert expected_path.exists()
+
+            # Verify content
+            with open(expected_path, "r") as f:
+                saved_content = f.read()
+
+            original_content = sample_stream_data[i - 1][1]["content"]
+            assert saved_content == original_content
+
+            # Verify packet contains path
+            assert "saved_path" in packet
+            assert packet["saved_path"] == str(expected_path)
+
+    def test_datastore_loading_function(self, temp_datastore):
+        """Test FunctionPod with function that loads data from datastore."""
+
+        # First, create some test files
+        test_files = {
+            "file1.txt": "Content of file 1",
+            "file2.txt": "Content of file 2",
+            "file3.txt": "Content of file 3",
+        }
+
+        for filename, content in test_files.items():
+            file_path = temp_datastore / filename
+            with open(file_path, "w") as f:
+                f.write(content)
+
+        def load_from_datastore(tag, packet, datastore):
+            """Load content from datastore based on filename in packet."""
+            filename = packet["filename"]
+            file_path = datastore / filename
+
+            if file_path.exists():
+                with open(file_path, "r") as f:
+                    content = f.read()
+                return tag, {**packet, "content": content, "loaded": True}
+            else:
+                return tag, {**packet, "content": None, "loaded": False}
+
+        # Create input stream with filenames
+        tags = [{"request_id": i} for i in range(1, 4)]
+        packets = [{"filename": f"file{i}.txt"} for i in range(1, 4)]
+        input_stream = SyncStreamFromLists(tags, packets)
+
+        # Create pod with datastore
+        pod = FunctionPod(load_from_datastore, datastore=temp_datastore)
+
+        # Process stream
+        result_stream = pod(input_stream)
+        result = list(result_stream)
+
+        # Check results
+        assert len(result) == 3
+
+        for i, (tag, packet) in enumerate(result):
+            assert packet["loaded"] is True
+            assert packet["content"] == f"Content of file {i + 1}"
+            assert packet["filename"] == f"file{i + 1}.txt"
+
+    def test_datastore_with_stateful_operations(self, temp_datastore):
+        """Test FunctionPod with stateful operations using datastore."""
+
+        def stateful_counter(tag, packet, datastore):
+            """Maintain a counter in datastore across invocations."""
+            counter_file = datastore / "counter.txt"
+
+            # Read current counter value
+            if counter_file.exists():
+                with open(counter_file, "r") as f:
+                    count = int(f.read().strip())
+            else:
+                count = 0
+
+            # Increment counter
+            count += 1
+
+            # Save new counter value
+            with open(counter_file, "w") as f:
+                f.write(str(count))
+
+            return tag, {**packet, "sequence_number": count}
+
+        # Create multiple input streams to test state persistence
+        tags1 = [{"batch": 1, "item": i} for i in range(3)]
+        packets1 = [{"data": f"item_{i}"} for i in range(3)]
+        stream1 = SyncStreamFromLists(tags1, packets1)
+
+        tags2 = [{"batch": 2, "item": i} for i in range(2)]
+        packets2 = [{"data": f"item_{i}"} for i in range(2)]
+        stream2 = SyncStreamFromLists(tags2, packets2)
+
+        # Create pod with datastore
+        pod = FunctionPod(stateful_counter, datastore=temp_datastore)
+
+        # Process first stream
+        result1 = list(pod(stream1))
+
+        # Process second stream (should continue counting)
+        result2 = list(pod(stream2))
+
+        # Check that counter state persisted across streams
+        expected_sequences1 = [1, 2, 3]
+        expected_sequences2 = [4, 5]
+
+        for i, (tag, packet) in enumerate(result1):
+            assert packet["sequence_number"] == expected_sequences1[i]
+
+        for i, (tag, packet) in enumerate(result2):
+            assert packet["sequence_number"] == expected_sequences2[i]
+
+    def test_datastore_error_handling(self, temp_datastore):
+        """Test error handling when datastore operations fail."""
+
+        def failing_datastore_operation(tag, packet, datastore):
+            """Function that tries to access non-existent file."""
+            nonexistent_file = datastore / "nonexistent.txt"
+
+            # This should raise an exception
+            with open(nonexistent_file, "r") as f:
+                content = f.read()
+
+            return tag, {**packet, "content": content}
+
+        tags = [{"id": 1}]
+        packets = [{"data": "test"}]
+        stream = SyncStreamFromLists(tags, packets)
+
+        pod = FunctionPod(failing_datastore_operation, datastore=temp_datastore)
+        result_stream = pod(stream)
+
+        # Should propagate the file not found error
+        with pytest.raises(FileNotFoundError):
+            list(result_stream)
+
+    def test_datastore_with_subdirectories(self, temp_datastore):
+        """Test FunctionPod with datastore operations using subdirectories."""
+
+        def organize_by_type(tag, packet, datastore):
+            """Organize files by type in subdirectories."""
+            file_type = packet["type"]
+            content = packet["content"]
+            file_id = tag["id"]
+
+            # Create subdirectory
+            type_dir = datastore / file_type
+            type_dir.mkdir(exist_ok=True)
+
+            # Save file in subdirectory
+            file_path = type_dir / f"{file_id}.txt"
+            with open(file_path, "w") as f:
+                f.write(content)
+
+            return tag, {**packet, "organized_path": str(file_path)}
+
+        # Create input with different types
+        tags = [{"id": f"file_{i}"} for i in range(4)]
+        packets = [
+            {"type": "documents", "content": "Document content 1"},
+            {"type": "images", "content": "Image metadata 1"},
+            {"type": "documents", "content": "Document content 2"},
+            {"type": "code", "content": "Python code"},
+        ]
+        stream = SyncStreamFromLists(tags, packets)
+
+        pod = FunctionPod(organize_by_type, datastore=temp_datastore)
+        result = list(pod(stream))
+
+        # Check that subdirectories were created
+        assert (temp_datastore / "documents").exists()
+        assert (temp_datastore / "images").exists()
+        assert (temp_datastore / "code").exists()
+
+        # Check that files were saved in correct subdirectories
+        assert (temp_datastore / "documents" / "file_0.txt").exists()
+        assert (temp_datastore / "images" / "file_1.txt").exists()
+        assert (temp_datastore / "documents" / "file_2.txt").exists()
+        assert (temp_datastore / "code" / "file_3.txt").exists()
+
+    def test_datastore_without_datastore_param(self):
+        """Test that function without datastore parameter works normally."""
+
+        def simple_function(tag, packet):
+            """Function that doesn't use datastore."""
+            return tag, {**packet, "processed": True}
+
+        # This should work even though we don't provide datastore
+        pod = FunctionPod(simple_function)
+
+        tags = [{"id": 1}]
+        packets = [{"data": "test"}]
+        stream = SyncStreamFromLists(tags, packets)
+
+        result = list(pod(stream))
+        assert len(result) == 1
+        assert result[0][1]["processed"] is True
+
+    def test_datastore_metadata_operations(self, temp_datastore):
+        """Test FunctionPod with metadata tracking in datastore."""
+
+        def track_processing_metadata(tag, packet, datastore):
+            """Track processing metadata for each item."""
+            import time
+            import json
+
+            item_id = tag["id"]
+            processing_time = time.time()
+
+            # Create metadata entry
+            metadata = {
+                "item_id": item_id,
+                "processed_at": processing_time,
+                "original_data": packet["data"],
+                "processing_status": "completed",
+            }
+
+            # Save metadata
+            metadata_file = datastore / f"metadata_{item_id}.json"
+            with open(metadata_file, "w") as f:
+                json.dump(metadata, f)
+
+            return tag, {**packet, "metadata_file": str(metadata_file)}
+
+        tags = [{"id": f"item_{i}"} for i in range(3)]
+        packets = [{"data": f"data_{i}"} for i in range(3)]
+        stream = SyncStreamFromLists(tags, packets)
+
+        pod = FunctionPod(track_processing_metadata, datastore=temp_datastore)
+        result = list(pod(stream))
+
+        # Check that metadata files were created
+        for i in range(3):
+            metadata_file = temp_datastore / f"metadata_item_{i}.json"
+            assert metadata_file.exists()
+
+            # Verify metadata content
+            import json
+
+            with open(metadata_file, "r") as f:
+                metadata = json.load(f)
+
+            assert metadata["item_id"] == f"item_{i}"
+            assert metadata["original_data"] == f"data_{i}"
+            assert metadata["processing_status"] == "completed"
+            assert "processed_at" in metadata
+
+    def test_datastore_with_generator_function(self, temp_datastore):
+        """Test FunctionPod with generator function that uses datastore."""
+
+        def split_and_save(tag, packet, datastore):
+            """Split content and save each part separately."""
+            content = packet["content"]
+            parts = content.split()
+            base_id = tag["id"]
+
+            for i, part in enumerate(parts):
+                part_id = f"{base_id}_part_{i}"
+
+                # Save part to datastore
+                part_file = datastore / f"{part_id}.txt"
+                with open(part_file, "w") as f:
+                    f.write(part)
+
+                # Yield new tag-packet pair
+                new_tag = {**tag, "part_id": part_id, "part_index": i}
+                new_packet = {"part_content": part, "saved_to": str(part_file)}
+                yield new_tag, new_packet
+
+        tags = [{"id": "doc1"}]
+        packets = [{"content": "Hello World Python Programming"}]
+        stream = SyncStreamFromLists(tags, packets)
+
+        pod = FunctionPod(split_and_save, datastore=temp_datastore)
+        result = list(pod(stream))
+
+        # Should have 4 parts
+        assert len(result) == 4
+
+        expected_parts = ["Hello", "World", "Python", "Programming"]
+        for i, (tag, packet) in enumerate(result):
+            assert tag["part_index"] == i
+            assert packet["part_content"] == expected_parts[i]
+
+            # Check that file was saved
+            saved_file = Path(packet["saved_to"])
+            assert saved_file.exists()
+
+            with open(saved_file, "r") as f:
+                saved_content = f.read()
+            assert saved_content == expected_parts[i]
+
+    def test_datastore_path_validation(self, temp_datastore):
+        """Test that datastore path is properly validated and accessible."""
+
+        def check_datastore_access(tag, packet, datastore):
+            """Function that checks datastore accessibility."""
+            # Check if datastore is a Path object
+            assert isinstance(datastore, Path)
+
+            # Check if datastore directory exists and is writable
+            assert datastore.exists()
+            assert datastore.is_dir()
+
+            # Test writing and reading
+            test_file = datastore / "access_test.txt"
+            with open(test_file, "w") as f:
+                f.write("test")
+
+            with open(test_file, "r") as f:
+                content = f.read()
+
+            assert content == "test"
+
+            # Clean up
+            test_file.unlink()
+
+            return tag, {**packet, "datastore_accessible": True}
+
+        tags = [{"id": 1}]
+        packets = [{"data": "test"}]
+        stream = SyncStreamFromLists(tags, packets)
+
+        pod = FunctionPod(check_datastore_access, datastore=temp_datastore)
+        result = list(pod(stream))
+
+        assert result[0][1]["datastore_accessible"] is True
diff --git a/tests/test_streams_operations/test_pods/test_pod_base.py b/tests/test_streams_operations/test_pods/test_pod_base.py
new file mode 100644
index 0000000..8c79a9d
--- /dev/null
+++ b/tests/test_streams_operations/test_pods/test_pod_base.py
@@ -0,0 +1,274 @@
+"""Tests for base Pod functionality."""
+
+import pytest
+from orcabridge.pod import Pod
+from orcabridge.stream import SyncStreamFromLists
+
+
+class TestPodBase:
+    """Test cases for base Pod class."""
+
+    def test_pod_creation(self):
+        """Test basic pod creation."""
+        pod = Pod()
+        assert pod is not None
+
+    def test_pod_call_interface(self, sample_stream):
+        """Test that pod implements callable interface."""
+        pod = Pod()
+
+        # Base Pod should be callable, but might not do anything useful
+        # This tests the interface exists
+        try:
+            result_stream = pod(sample_stream)
+            # If it succeeds, result should be a stream
+            assert hasattr(result_stream, "__iter__")
+        except NotImplementedError:
+            # Base Pod might not implement __call__
+            pass
+
+    def test_pod_with_empty_stream(self):
+        """Test pod with empty stream."""
+        empty_stream = SyncStreamFromLists([], [])
+        pod = Pod()
+
+        try:
+            result_stream = pod(empty_stream)
+            result = list(result_stream)
+            # If implemented, should handle empty stream
+            assert isinstance(result, list)
+        except NotImplementedError:
+            # Base Pod might not implement functionality
+            pass
+
+    def test_pod_inheritance(self):
+        """Test that Pod can be inherited."""
+
+        class CustomPod(Pod):
+            def __call__(self, stream):
+                # Simple pass-through implementation
+                for packet, tag in stream:
+                    yield packet, tag
+
+        custom_pod = CustomPod()
+        packets = ["data1", "data2", "data3"]
+        tags = ["tag1", "tag2", "tag3"]
+
+        stream = SyncStreamFromLists(packets, tags)
+        result_stream = custom_pod(stream)
+        result = list(result_stream)
+
+        expected = list(zip(packets, tags))
+        assert result == expected
+
+    def test_pod_chaining(self):
+        """Test chaining pods together."""
+
+        class AddPrefixPod(Pod):
+            def __init__(self, prefix):
+                self.prefix = prefix
+
+            def __call__(self, stream):
+                for packet, tag in stream:
+                    yield f"{self.prefix}_{packet}", tag
+
+        class AddSuffixPod(Pod):
+            def __init__(self, suffix):
+                self.suffix = suffix
+
+            def __call__(self, stream):
+                for packet, tag in stream:
+                    yield f"{packet}_{self.suffix}", tag
+
+        packets = ["data1", "data2"]
+        tags = ["tag1", "tag2"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        # Chain two pods
+        prefix_pod = AddPrefixPod("PRE")
+        suffix_pod = AddSuffixPod("SUF")
+
+        intermediate_stream = prefix_pod(stream)
+        final_stream = suffix_pod(intermediate_stream)
+
+        result = list(final_stream)
+
+        expected = [("PRE_data1_SUF", "tag1"), ("PRE_data2_SUF", "tag2")]
+        assert result == expected
+
+    def test_pod_error_handling(self):
+        """Test pod error handling."""
+
+        class ErrorPod(Pod):
+            def __call__(self, stream):
+                for i, (packet, tag) in enumerate(stream):
+                    if i == 1:  # Error on second item
+                        raise ValueError("Test error")
+                    yield packet, tag
+
+        packets = ["data1", "data2", "data3"]
+        tags = ["tag1", "tag2", "tag3"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        error_pod = ErrorPod()
+        result_stream = error_pod(stream)
+
+        # Should raise error when processing second item
+        with pytest.raises(ValueError, match="Test error"):
+            list(result_stream)
+
+    def test_pod_stateful_processing(self):
+        """Test pod with stateful processing."""
+
+        class CounterPod(Pod):
+            def __init__(self):
+                self.count = 0
+
+            def __call__(self, stream):
+                for packet, tag in stream:
+                    self.count += 1
+                    yield (packet, self.count), tag
+
+        packets = ["a", "b", "c"]
+        tags = ["t1", "t2", "t3"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        counter_pod = CounterPod()
+        result_stream = counter_pod(stream)
+        result = list(result_stream)
+
+        expected = [(("a", 1), "t1"), (("b", 2), "t2"), (("c", 3), "t3")]
+        assert result == expected
+
+    def test_pod_multiple_outputs_per_input(self):
+        """Test pod that produces multiple outputs per input."""
+
+        class DuplicatorPod(Pod):
+            def __call__(self, stream):
+                for packet, tag in stream:
+                    yield f"{packet}_copy1", f"{tag}_1"
+                    yield f"{packet}_copy2", f"{tag}_2"
+
+        packets = ["data1", "data2"]
+        tags = ["tag1", "tag2"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        duplicator_pod = DuplicatorPod()
+        result_stream = duplicator_pod(stream)
+        result = list(result_stream)
+
+        expected = [
+            ("data1_copy1", "tag1_1"),
+            ("data1_copy2", "tag1_2"),
+            ("data2_copy1", "tag2_1"),
+            ("data2_copy2", "tag2_2"),
+        ]
+        assert result == expected
+
+    def test_pod_filtering(self):
+        """Test pod that filters items."""
+
+        class FilterPod(Pod):
+            def __init__(self, predicate):
+                self.predicate = predicate
+
+            def __call__(self, stream):
+                for packet, tag in stream:
+                    if self.predicate(packet, tag):
+                        yield packet, tag
+
+        packets = [1, 2, 3, 4, 5]
+        tags = ["odd", "even", "odd", "even", "odd"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        # Filter for even numbers
+        def is_even(packet, tag):
+            return packet % 2 == 0
+
+        filter_pod = FilterPod(is_even)
+        result_stream = filter_pod(stream)
+        result = list(result_stream)
+
+        expected = [(2, "even"), (4, "even")]
+        assert result == expected
+
+    def test_pod_transformation(self):
+        """Test pod that transforms data."""
+
+        class TransformPod(Pod):
+            def __init__(self, transform_func):
+                self.transform_func = transform_func
+
+            def __call__(self, stream):
+                for packet, tag in stream:
+                    new_packet, new_tag = self.transform_func(packet, tag)
+                    yield new_packet, new_tag
+
+        packets = ["hello", "world"]
+        tags = ["greeting", "noun"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        def uppercase_transform(packet, tag):
+            return packet.upper(), tag.upper()
+
+        transform_pod = TransformPod(uppercase_transform)
+        result_stream = transform_pod(stream)
+        result = list(result_stream)
+
+        expected = [("HELLO", "GREETING"), ("WORLD", "NOUN")]
+        assert result == expected
+
+    def test_pod_aggregation(self):
+        """Test pod that aggregates data."""
+
+        class SumPod(Pod):
+            def __call__(self, stream):
+                total = 0
+                count = 0
+                for packet, tag in stream:
+                    if isinstance(packet, (int, float)):
+                        total += packet
+                        count += 1
+
+                if count > 0:
+                    yield total, f"sum_of_{count}_items"
+
+        packets = [1, 2, 3, 4, 5]
+        tags = ["n1", "n2", "n3", "n4", "n5"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        sum_pod = SumPod()
+        result_stream = sum_pod(stream)
+        result = list(result_stream)
+
+        expected = [(15, "sum_of_5_items")]
+        assert result == expected
+
+    def test_pod_with_complex_data(self):
+        """Test pod with complex data structures."""
+
+        class ExtractorPod(Pod):
+            def __call__(self, stream):
+                for packet, tag in stream:
+                    if isinstance(packet, dict):
+                        for key, value in packet.items():
+                            yield value, f"{tag}_{key}"
+                    else:
+                        yield packet, tag
+
+        packets = [{"a": 1, "b": 2}, "simple_string", {"x": 10, "y": 20, "z": 30}]
+        tags = ["dict1", "str1", "dict2"]
+        stream = SyncStreamFromLists(packets, tags)
+
+        extractor_pod = ExtractorPod()
+        result_stream = extractor_pod(stream)
+        result = list(result_stream)
+
+        # Should extract dict values as separate items
+        assert len(result) == 6  # 2 + 1 + 3
+        assert (1, "dict1_a") in result
+        assert (2, "dict1_b") in result
+        assert ("simple_string", "str1") in result
+        assert (10, "dict2_x") in result
+        assert (20, "dict2_y") in result
+        assert (30, "dict2_z") in result
diff --git a/tests/test_streams_operations/test_sources/__init__.py b/tests/test_streams_operations/test_sources/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_streams_operations/test_sources/test_glob_source.py b/tests/test_streams_operations/test_sources/test_glob_source.py
new file mode 100644
index 0000000..62875a0
--- /dev/null
+++ b/tests/test_streams_operations/test_sources/test_glob_source.py
@@ -0,0 +1,325 @@
+"""Tests for GlobSource functionality."""
+
+import pytest
+import os
+from pathlib import Path
+from orcabridge.sources import GlobSource
+
+
+class TestGlobSource:
+    """Test cases for GlobSource."""
+
+    def test_glob_source_basic(self, test_files, temp_dir):
+        """Test basic glob source functionality."""
+        # Create a glob pattern for txt files
+        pattern = os.path.join(temp_dir, "*.txt")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+
+        # Should find all txt files
+        txt_files = [f for f in test_files if f.endswith(".txt")]
+        assert len(result) == len(txt_files)
+
+        # Check that all found files are actual files
+        for file_content, file_path in result:
+            assert os.path.isfile(file_path)
+            assert file_path.endswith(".txt")
+            assert isinstance(file_content, str)  # Text content
+
+    def test_glob_source_specific_pattern(self, test_files, temp_dir):
+        """Test glob source with specific pattern."""
+        # Look for files starting with "file1"
+        pattern = os.path.join(temp_dir, "file1*")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+
+        # Should find only file1.txt
+        assert len(result) == 1
+        file_content, file_path = result[0]
+        assert "file1.txt" in file_path
+        assert file_content == "Content of file 1"
+
+    def test_glob_source_binary_files(self, test_files, temp_dir):
+        """Test glob source with binary files."""
+        # Look for binary files
+        pattern = os.path.join(temp_dir, "*.bin")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+
+        # Should find all binary files
+        bin_files = [f for f in test_files if f.endswith(".bin")]
+        assert len(result) == len(bin_files)
+
+        for file_content, file_path in result:
+            assert file_path.endswith(".bin")
+            assert isinstance(file_content, bytes)  # Binary content
+
+    def test_glob_source_json_files(self, test_files, temp_dir):
+        """Test glob source with JSON files."""
+        pattern = os.path.join(temp_dir, "*.json")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+
+        # Should find all JSON files
+        json_files = [f for f in test_files if f.endswith(".json")]
+        assert len(result) == len(json_files)
+
+        for file_content, file_path in result:
+            assert file_path.endswith(".json")
+            # Content should be the raw JSON string
+            assert '"key"' in file_content
+
+    def test_glob_source_no_matches(self, temp_dir):
+        """Test glob source when pattern matches no files."""
+        pattern = os.path.join(temp_dir, "*.nonexistent")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+        assert len(result) == 0
+
+    def test_glob_source_recursive_pattern(self, temp_dir):
+        """Test glob source with recursive pattern."""
+        # Create subdirectory with files
+        subdir = os.path.join(temp_dir, "subdir")
+        os.makedirs(subdir, exist_ok=True)
+
+        sub_file = os.path.join(subdir, "sub_file.txt")
+        with open(sub_file, "w") as f:
+            f.write("Subdirectory content")
+
+        # Use recursive pattern
+        pattern = os.path.join(temp_dir, "**", "*.txt")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+
+        # Should find files in both root and subdirectory
+        txt_files = [file_path for _, file_path in result]
+
+        # Check that we found files in subdirectory
+        sub_files = [f for f in txt_files if "subdir" in f]
+        assert len(sub_files) > 0
+
+        # Verify content of subdirectory file
+        sub_result = [
+            (content, path) for content, path in result if "sub_file.txt" in path
+        ]
+        assert len(sub_result) == 1
+        assert sub_result[0][0] == "Subdirectory content"
+
+    def test_glob_source_absolute_vs_relative_paths(self, test_files, temp_dir):
+        """Test glob source with both absolute and relative paths."""
+        # Test with absolute path
+        abs_pattern = os.path.join(os.path.abspath(temp_dir), "*.txt")
+        abs_source = GlobSource(abs_pattern)
+        abs_stream = abs_source()
+        abs_result = list(abs_stream)
+
+        # Test with relative path (if possible)
+        current_dir = os.getcwd()
+        try:
+            os.chdir(temp_dir)
+            rel_pattern = "*.txt"
+            rel_source = GlobSource(rel_pattern)
+            rel_stream = rel_source()
+            rel_result = list(rel_stream)
+
+            # Should find the same number of files
+            assert len(abs_result) == len(rel_result)
+
+        finally:
+            os.chdir(current_dir)
+
+    def test_glob_source_empty_directory(self, temp_dir):
+        """Test glob source in empty directory."""
+        empty_dir = os.path.join(temp_dir, "empty_subdir")
+        os.makedirs(empty_dir, exist_ok=True)
+
+        pattern = os.path.join(empty_dir, "*")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+        assert len(result) == 0
+
+    def test_glob_source_large_directory(self, temp_dir):
+        """Test glob source with many files."""
+        # Create many files
+        for i in range(50):
+            file_path = os.path.join(temp_dir, f"bulk_file_{i:03d}.txt")
+            with open(file_path, "w") as f:
+                f.write(f"Content of bulk file {i}")
+
+        pattern = os.path.join(temp_dir, "bulk_file_*.txt")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+
+        assert len(result) == 50
+
+        # Check that files are properly ordered (if implementation sorts)
+        file_paths = [file_path for _, file_path in result]
+        for i, file_path in enumerate(file_paths):
+            if "bulk_file_000.txt" in file_path:
+                # Found the first file, check content
+                content = [content for content, path in result if path == file_path][0]
+                assert "Content of bulk file 0" in content
+
+    def test_glob_source_special_characters_in_filenames(self, temp_dir):
+        """Test glob source with special characters in filenames."""
+        # Create files with special characters
+        special_files = [
+            "file with spaces.txt",
+            "file-with-dashes.txt",
+            "file_with_underscores.txt",
+            "file.with.dots.txt",
+        ]
+
+        for filename in special_files:
+            file_path = os.path.join(temp_dir, filename)
+            with open(file_path, "w") as f:
+                f.write(f"Content of {filename}")
+
+        pattern = os.path.join(temp_dir, "file*.txt")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+
+        # Should find all special files plus any existing test files
+        found_files = [os.path.basename(file_path) for _, file_path in result]
+
+        for special_file in special_files:
+            assert special_file in found_files
+
+    def test_glob_source_mixed_file_types(self, test_files, temp_dir):
+        """Test glob source that matches multiple file types."""
+        # Pattern that matches both txt and json files
+        pattern = os.path.join(temp_dir, "file*")
+
+        source = GlobSource(pattern)
+        stream = source()
+
+        result = list(stream)
+
+        # Should find both text and json files
+        file_extensions = [os.path.splitext(file_path)[1] for _, file_path in result]
+
+        assert ".txt" in file_extensions
+        assert ".json" in file_extensions
+
+    def test_glob_source_case_sensitivity(self, temp_dir):
+        """Test glob source case sensitivity."""
+        # Create files with different cases
+        files = ["Test.TXT", "test.txt", "TEST.txt"]
+
+        for filename in files:
+            file_path = os.path.join(temp_dir, filename)
+            with open(file_path, "w") as f:
+                f.write(f"Content of {filename}")
+
+        # Test exact case match
+        pattern = os.path.join(temp_dir, "test.txt")
+        source = GlobSource(pattern)
+        stream = source()
+        result = list(stream)
+
+        # Should find at least the exact match
+        found_files = [os.path.basename(file_path) for _, file_path in result]
+        assert "test.txt" in found_files
+
+    def test_glob_source_symlinks(self, temp_dir):
+        """Test glob source with symbolic links (if supported)."""
+        # Create a regular file
+        original_file = os.path.join(temp_dir, "original.txt")
+        with open(original_file, "w") as f:
+            f.write("Original content")
+
+        try:
+            # Create a symbolic link
+            link_file = os.path.join(temp_dir, "link.txt")
+            os.symlink(original_file, link_file)
+
+            pattern = os.path.join(temp_dir, "*.txt")
+            source = GlobSource(pattern)
+            stream = source()
+            result = list(stream)
+
+            # Should find both original and link
+            file_paths = [file_path for _, file_path in result]
+            original_found = any("original.txt" in path for path in file_paths)
+            link_found = any("link.txt" in path for path in file_paths)
+
+            assert original_found
+            # Link behavior depends on implementation
+
+        except (OSError, NotImplementedError):
+            # Symlinks not supported on this system
+            pass
+
+    def test_glob_source_error_handling(self, temp_dir):
+        """Test glob source error handling."""
+        # Test with invalid pattern
+        invalid_pattern = "/nonexistent/path/*.txt"
+
+        source = GlobSource(invalid_pattern)
+        stream = source()
+
+        # Should handle gracefully (empty result or specific error)
+        try:
+            result = list(stream)
+            # If no error, should be empty
+            assert len(result) == 0
+        except (OSError, FileNotFoundError):
+            # Expected error for invalid path
+            pass
+
+    def test_glob_source_file_permissions(self, temp_dir):
+        """Test glob source with files of different permissions."""
+        # Create a file and try to change permissions
+        restricted_file = os.path.join(temp_dir, "restricted.txt")
+        with open(restricted_file, "w") as f:
+            f.write("Restricted content")
+
+        try:
+            # Try to make file unreadable
+            os.chmod(restricted_file, 0o000)
+
+            pattern = os.path.join(temp_dir, "restricted.txt")
+            source = GlobSource(pattern)
+            stream = source()
+
+            # Should handle permission errors gracefully
+            try:
+                result = list(stream)
+                # If successful, content might be empty or error
+            except PermissionError:
+                # Expected for restricted files
+                pass
+
+        finally:
+            # Restore permissions for cleanup
+            try:
+                os.chmod(restricted_file, 0o644)
+            except:
+                pass
diff --git a/tests/test_streams_operations/test_streams/__init__.py b/tests/test_streams_operations/test_streams/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_streams_operations/test_streams/test_base_classes.py b/tests/test_streams_operations/test_streams/test_base_classes.py
new file mode 100644
index 0000000..7eecab3
--- /dev/null
+++ b/tests/test_streams_operations/test_streams/test_base_classes.py
@@ -0,0 +1,514 @@
+"""
+Test module for base Stream and SyncStream classes.
+
+This module tests the fundamental stream functionality including
+iteration, flow operations, labeling, key management, and invocation tracking.
+"""
+
+from collections.abc import Collection, Iterator
+import pytest
+from unittest.mock import Mock, MagicMock
+from abc import ABC
+
+from orcabridge.base import Stream, SyncStream, Operation, Invocation
+from orcabridge.mappers import Join
+from orcabridge.streams import SyncStreamFromLists, SyncStreamFromGenerator
+from orcabridge.types import Tag, Packet
+
+
+class ConcreteStream(Stream):
+    """Concrete Stream implementation for testing."""
+
+    def __init__(self, data: Collection[tuple[Tag, Packet]], label=None):
+        super().__init__(label=label)
+        self.data = data
+
+    def __iter__(self):
+        return iter(self.data)
+
+
+class ConcreteSyncStream(SyncStream):
+    """Concrete SyncStream implementation for testing."""
+
+    def __init__(self, data: Collection[tuple[Tag, Packet]], label=None):
+        super().__init__(label=label)
+        self.data = data
+
+    def __iter__(self) -> Iterator[tuple[Tag, Packet]]:
+        return iter(self.data)
+
+
+@pytest.fixture
+def sample_stream_data():
+    """Sample stream data for testing."""
+    return [
+        ({"id": 1, "type": "text"}, {"content": "Hello", "size": 5}),
+        ({"id": 2, "type": "text"}, {"content": "World", "size": 5}),
+        ({"id": 3, "type": "number"}, {"value": 42, "unit": "count"}),
+    ]
+
+
+@pytest.fixture
+def sample_tags_packets(sample_stream_data):
+    """Extract tags and packets from sample data."""
+    tags, packets = zip(*sample_stream_data)
+    return list(tags), list(packets)
+
+
+class TestStreamBase:
+    """Test cases for base Stream class."""
+
+    def test_stream_labels(self, sample_stream_data):
+        """Test Stream initialization with and without label."""
+
+        # Without label
+        stream = ConcreteStream(sample_stream_data)
+        assert stream.label == "ConcreteStream", (
+            f"Label should default to class name {stream.__class__.__name__} but got {stream.label}"
+        )
+        assert stream.invocation is None
+
+        # With label
+        labeled_stream = ConcreteStream(sample_stream_data, label="test_stream")
+        assert labeled_stream.label == "test_stream"
+
+    def test_stream_iteration(self, sample_stream_data):
+        """Test that Stream can be iterated over."""
+        stream = ConcreteStream(sample_stream_data)
+
+        result = list(stream)
+        assert result == sample_stream_data
+
+        # Test multiple iterations
+        result2 = list(stream)
+        assert result2 == sample_stream_data
+
+    def test_stream_flow(self, sample_stream_data):
+        """Test Stream.flow() method."""
+        stream = ConcreteStream(sample_stream_data)
+
+        flowed = stream.flow()
+        assert flowed == sample_stream_data
+        assert isinstance(flowed, list)
+
+    def test_stream_identity_structure(self, sample_stream_data):
+        """Test Stream identity structure."""
+        stream = ConcreteStream(sample_stream_data)
+
+        # Default identity structure for uninvoked stream should be None
+        identity = stream.identity_structure()
+        # TODO: consider alternative behavior for identity structure for streams
+        assert identity is None
+
+    def test_stream_keys_default(self, sample_stream_data):
+        """Test Stream keys method default behavior."""
+        stream = ConcreteStream(sample_stream_data)
+
+        tag_keys, packet_keys = stream.keys()
+        # Default implementation will be based on the first sample from the stream
+        assert tag_keys is not None and set(tag_keys) == set(["id", "type"])
+
+        assert packet_keys is not None and set(packet_keys) == set(["content", "size"])
+
+    def test_stream_repr(self, sample_stream_data):
+        """Test Stream string representation."""
+        stream = ConcreteStream(sample_stream_data, label="test_stream")
+
+        repr_str = repr(stream)
+        assert "ConcreteStream" in repr_str
+        assert "test_stream" in repr_str
+
+
+class TestSyncStreamBase:
+    """Test cases for SyncStream base class."""
+
+    def test_syncstream_initialization(self, sample_stream_data):
+        """Test SyncStream initialization."""
+        sync_stream = ConcreteSyncStream(sample_stream_data)
+
+        assert isinstance(sync_stream, Stream)
+        assert isinstance(sync_stream, SyncStream)
+
+    def test_syncstream_rshift_operator_dict(self, sample_stream_data):
+        """Test SyncStream >> operator with dictionary mapping."""
+        sync_stream = SyncStreamFromLists(paired=sample_stream_data)
+
+        # Test with dictionary (should use MapPackets)
+        mapping = {"content": "text", "size": "length"}
+        mapped_stream = sync_stream >> mapping
+
+        assert isinstance(mapped_stream, SyncStream)
+        result = list(mapped_stream)
+
+        # Check that mapping was applied
+        for (tag, packet), (ref_tag, ref_packet) in zip(result, sample_stream_data):
+            if "content" in ref_packet:
+                assert "text" in packet
+                assert packet["text"] == ref_packet["content"]
+            if "size" in ref_packet:
+                assert "length" in packet
+                assert packet["length"] == ref_packet["size"]
+
+    def test_syncstream_rshift_operator_callable(self, sample_stream_data):
+        """Test SyncStream >> operator with callable transformer."""
+        sync_stream = SyncStreamFromLists(paired=sample_stream_data)
+
+        def add_processed_flag(stream):
+            """Add processed flag to all packets."""
+
+            def generator():
+                for tag, packet in stream:
+                    yield tag, {**packet, "processed": True}
+
+            return SyncStreamFromGenerator(generator)
+
+        transformed = sync_stream >> add_processed_flag
+        result = list(transformed)
+
+        # Check that all packets have processed flag
+        for _, packet in result:
+            assert packet["processed"] is True
+
+    def test_syncstream_mul_operator(self, sample_tags_packets):
+        """Test SyncStream * operator for joining streams."""
+        tags1, packets1 = sample_tags_packets
+        stream1 = SyncStreamFromLists(tags1[:2], packets1[:2])
+
+        tags2 = [{"id": 1, "category": "A"}, {"id": 2, "category": "B"}]
+        packets2 = [{"priority": "high"}, {"priority": "low"}]
+        stream2 = SyncStreamFromLists(tags2, packets2)
+
+        # Test join operation
+        joined = stream1 * stream2
+
+        assert joined.invocation is not None and isinstance(
+            joined.invocation.operation, Join
+        ), (
+            f"* operator should be resulting from an Join object invocation but got {type(joined)}"
+        )
+        result = list(joined)
+
+        # Should have joined results where tags match
+        assert len(result) >= 0  # Exact count depends on tag matching logic
+
+    def test_syncstream_mul_operator_type_error(self, sample_tags_packets):
+        """Test SyncStream * operator with invalid type."""
+        tags, packets = sample_tags_packets
+        sync_stream = SyncStreamFromLists(tags, packets)
+
+        with pytest.raises(TypeError, match="other must be a SyncStream"):
+            sync_stream * "not_a_stream"  # type: ignore
+
+    def test_syncstream_rshift_invalid_type(self, sample_tags_packets):
+        """Test SyncStream >> operator with invalid transformer type."""
+        tags, packets = sample_tags_packets
+        sync_stream = SyncStreamFromLists(tags, packets)
+
+        # Should handle non-dict, non-callable gracefully or raise appropriate error
+        with pytest.raises((TypeError, AttributeError)):
+            sync_stream >> 123  # type: ignore
+
+    def test_syncstream_chaining_operations(self, sample_tags_packets):
+        """Test chaining multiple SyncStream operations."""
+        tags, packets = sample_tags_packets
+        sync_stream = SyncStreamFromLists(tags, packets)
+
+        # Chain multiple transformations
+        def add_flag(stream):
+            def generator():
+                for tag, packet in stream:
+                    yield tag, {**packet, "chained": True}
+
+            return SyncStreamFromGenerator(generator)
+
+        def add_counter(stream):
+            def generator():
+                for i, (tag, packet) in enumerate(stream):
+                    yield tag, {**packet, "counter": i}
+
+            return SyncStreamFromGenerator(generator)
+
+        result_stream = sync_stream >> add_flag >> add_counter
+        result = list(result_stream)
+
+        # Check that both transformations were applied
+        for i, (tag, packet) in enumerate(result):
+            assert packet["chained"] is True
+            assert packet["counter"] == i
+
+
+class TestSyncStreamFromLists:
+    """Test cases for SyncStreamFromLists implementation."""
+
+    def test_creation_from_lists(self, sample_tags_packets):
+        """Test SyncStreamFromLists creation."""
+        tags, packets = sample_tags_packets
+        stream = SyncStreamFromLists(tags, packets)
+
+        assert isinstance(stream, SyncStream)
+        result = list(stream)
+
+        expected = list(zip(tags, packets))
+        assert result == expected
+
+    def test_creation_with_mismatched_lengths(self):
+        """Test SyncStreamFromLists with mismatched tag/packet lengths."""
+        tags = [{"id": "1"}, {"id": "2"}]
+        packets = [{"data": "a"}]  # One less packet
+
+        # If strict (default), should raise a ValueError
+        with pytest.raises(ValueError):
+            stream = SyncStreamFromLists(tags, packets, strict=True)
+
+        # If not strict, should handle gracefully and create based on the shortest length
+        stream = SyncStreamFromLists(tags, packets, strict=False)
+        result = list(stream)
+
+        assert len(result) == 1
+        assert result[0] == ({"id": "1"}, {"data": "a"})
+
+    def test_empty_lists(self):
+        """Test SyncStreamFromLists with empty lists."""
+        stream = SyncStreamFromLists([], [])
+        result = list(stream)
+
+        assert result == []
+
+    def test_keys_inference(self, sample_tags_packets):
+        """Test key inference from tag and packet data."""
+        tags, packets = sample_tags_packets
+        stream = SyncStreamFromLists(tags, packets)
+
+        tag_keys, packet_keys = stream.keys()
+
+        # Should infer keys from the first element
+        expected_tag_keys = set()
+        expected_packet_keys = set()
+
+        if tags:
+            expected_tag_keys.update(tags[0].keys())
+        if packets:
+            expected_packet_keys.update(packets[0].keys())
+
+        assert tag_keys is not None and set(tag_keys) == expected_tag_keys
+        assert packet_keys is not None and set(packet_keys) == expected_packet_keys
+
+    def test_multiple_iterations(self, sample_tags_packets):
+        """Test that SyncStreamFromLists can be iterated multiple times."""
+        tags, packets = sample_tags_packets
+        stream = SyncStreamFromLists(tags, packets)
+
+        result1 = list(stream)
+        result2 = list(stream)
+
+        assert result1 == result2
+        assert len(result1) == len(tags)
+
+
+class TestSyncStreamFromGenerator:
+    """Test cases for SyncStreamFromGenerator implementation."""
+
+    def test_creation_from_generator(self, sample_stream_data):
+        """Test SyncStreamFromGenerator creation."""
+
+        def generator():
+            for item in sample_stream_data:
+                yield item
+
+        stream = SyncStreamFromGenerator(generator)
+        assert isinstance(stream, SyncStream)
+
+        result = list(stream)
+        assert result == sample_stream_data
+
+    def test_generator_multiple_iterations(self, sample_stream_data):
+        """Test that generator-based streams can be iterated multiple times"""
+
+        def generator():
+            for item in sample_stream_data:
+                yield item
+
+        stream = SyncStreamFromGenerator(generator)
+
+        # First iteration should work
+        result1 = list(stream)
+        assert result1 == sample_stream_data
+
+        # Second iteration should work (new iterator instance)
+        result2 = list(stream)
+        assert result2 == sample_stream_data
+
+    def test_empty_generator(self):
+        """Test SyncStreamFromGenerator with empty generator."""
+
+        def empty_generator():
+            return
+            yield  # This line is never reached
+
+        stream = SyncStreamFromGenerator(empty_generator)
+        result = list(stream)
+
+        assert result == []
+
+    def test_generator_with_exception(self):
+        """Test SyncStreamFromGenerator with generator that raises exception."""
+
+        def failing_generator():
+            yield ({"id": "1"}, {"data": "ok"})
+            raise ValueError("Generator failed")
+
+        stream = SyncStreamFromGenerator(failing_generator)
+
+        # Should propagate the exception
+        with pytest.raises(ValueError, match="Generator failed"):
+            list(stream)
+
+    def test_lazy_evaluation(self):
+        """Test that SyncStreamFromGenerator is lazily evaluated."""
+        call_count = {"count": 0}
+
+        def counting_generator():
+            call_count["count"] += 1
+            yield ({"id": "1"}, {"data": "test"})
+
+        stream = SyncStreamFromGenerator(counting_generator)
+
+        # Generator should not be called until iteration starts
+        assert call_count["count"] == 0
+
+        # Start iteration
+        iterator = iter(stream)
+        next(iterator)
+
+        # Now generator should have been called
+        assert call_count["count"] == 1
+
+    def test_inferred_keys_with_generator(self):
+        """Test key inference with generator streams."""
+
+        def sample_generator():
+            yield ({"id": "1", "type": "A"}, {"value": "10", "name": "test"})
+            yield ({"id": "2", "type": "B"}, {"value": "20", "size": "5"})
+
+        stream = SyncStreamFromGenerator(sample_generator)
+
+        # Keys should be inferred from generated data
+        tag_keys, packet_keys = stream.keys()
+
+        # Note: This depends on implementation - may need to consume stream
+        # to infer keys, or may return None
+        if tag_keys is not None:
+            assert "id" in tag_keys
+            assert "type" in tag_keys
+
+        if packet_keys is not None:
+            assert "value" in packet_keys
+
+    def test_specified_keys_with_generator(self):
+        """Test key inference with generator streams."""
+
+        def sample_generator():
+            yield ({"id": "1", "type": "A"}, {"value": "10", "name": "test"})
+            yield ({"id": "2", "type": "B"}, {"value": "20", "size": "5"})
+
+        # Specify keys explicitly -- it need not match the actual content
+        stream = SyncStreamFromGenerator(
+            sample_generator, tag_keys=["id"], packet_keys=["group"]
+        )
+
+        # Keys should be based on what was specified at the construction
+        tag_keys, packet_keys = stream.keys()
+
+        # Note: This depends on implementation - may need to consume stream
+        # to infer keys, or may return None
+        if tag_keys is not None:
+            assert "id" in tag_keys
+            assert "type" not in tag_keys
+
+        if packet_keys is not None:
+            assert "value" not in packet_keys
+            assert "group" in packet_keys
+
+
+class TestStreamIntegration:
+    """Integration tests for stream functionality."""
+
+    def test_stream_composition(self, sample_tags_packets):
+        """Test composing different stream types."""
+        tags, packets = sample_tags_packets
+
+        # Create streams from different sources
+        list_stream = SyncStreamFromLists(tags[:2], packets[:2])
+
+        def gen_func():
+            yield tags[2], packets[2]
+
+        gen_stream = SyncStreamFromGenerator(gen_func)
+
+        # Both should work similarly
+        list_result = list(list_stream)
+        gen_result = list(gen_stream)
+
+        assert len(list_result) == 2
+        assert len(gen_result) == 1
+
+        # Combine results
+        all_data = list_result + gen_result
+        assert len(all_data) == 3
+
+    def test_stream_with_complex_data(self):
+        """Test streams with complex nested data."""
+        complex_tags = [
+            {"id": 1, "metadata": {"type": "nested", "level": 1}},
+            {"id": 2, "metadata": {"type": "nested", "level": 2}},
+        ]
+        complex_packets = [
+            {"data": {"values": [1, 2, 3], "config": {"enabled": True}}},
+            {"data": {"values": [4, 5, 6], "config": {"enabled": False}}},
+        ]
+
+        stream = SyncStreamFromLists(complex_tags, complex_packets)
+        result = list(stream)
+
+        assert len(result) == 2
+
+        # Verify complex data is preserved
+        tag, packet = result[0]
+        assert tag["metadata"]["type"] == "nested"
+        assert packet["data"]["values"] == [1, 2, 3]
+        assert packet["data"]["config"]["enabled"] is True
+
+    def test_stream_memory_efficiency(self):
+        """Test that generator streams don't consume excessive memory."""
+
+        def large_generator():
+            for i in range(1000):
+                yield ({"id": i}, {"value": i * 2})
+
+        stream = SyncStreamFromGenerator(large_generator)
+
+        # Process in chunks to test memory efficiency
+        count = 0
+        for tag, packet in stream:
+            count += 1
+            if count > 10:  # Just test first few items
+                break
+
+        assert count == 11  # Processed 11 items
+
+    def test_stream_error_propagation(self, sample_tags_packets):
+        """Test that errors in stream data are properly propagated."""
+        tags, packets = sample_tags_packets
+
+        # Create stream with invalid data
+        invalid_tags = tags + [None]  # Add invalid tag
+        invalid_packets = packets + [{"data": "valid"}]
+
+        stream = SyncStreamFromLists(invalid_tags, invalid_packets)
+
+        # Should handle None tags gracefully or raise appropriate error
+        result = list(stream)
+
+        # The None tag should be included as-is
+        assert len(result) == 4
+        assert result[-1] == (None, {"data": "valid"})
diff --git a/tests/test_streams_operations/test_streams/test_sync_stream_implementations.py b/tests/test_streams_operations/test_streams/test_sync_stream_implementations.py
new file mode 100644
index 0000000..3b64887
--- /dev/null
+++ b/tests/test_streams_operations/test_streams/test_sync_stream_implementations.py
@@ -0,0 +1,578 @@
+"""
+Test module for SyncStream concrete implementations.
+
+This module tests the specific implementations of SyncStream including
+SyncStreamFromLists and SyncStreamFromGenerator, focusing on their unique
+behaviors, performance characteristics, and edge cases.
+"""
+
+import pytest
+from unittest.mock import Mock, patch
+import gc
+
+from orcabridge.stream import SyncStreamFromLists, SyncStreamFromGenerator
+from orcabridge.base import SyncStream
+
+
+@pytest.fixture
+def sample_data():
+    """Sample data for stream testing."""
+    return [
+        ({"id": 1, "type": "doc"}, {"content": "Hello", "size": 5}),
+        ({"id": 2, "type": "doc"}, {"content": "World", "size": 5}),
+        ({"id": 3, "type": "img"}, {"pixels": 1920 * 1080, "format": "png"}),
+    ]
+
+
+@pytest.fixture
+def sample_tags_packets(sample_data):
+    """Extract tags and packets separately."""
+    tags, packets = zip(*sample_data)
+    return list(tags), list(packets)
+
+
+class TestSyncStreamFromLists:
+    """Comprehensive tests for SyncStreamFromLists implementation."""
+
+    def test_basic_creation_and_iteration(self, sample_tags_packets):
+        """Test basic creation and iteration functionality."""
+        tags, packets = sample_tags_packets
+        stream = SyncStreamFromLists(tags, packets)
+
+        # Test basic properties
+        assert isinstance(stream, SyncStream)
+
+        # Test iteration
+        result = list(stream)
+        expected = list(zip(tags, packets))
+        assert result == expected
+
+    def test_creation_with_empty_lists(self):
+        """Test creation with empty tag and packet lists."""
+        stream = SyncStreamFromLists([], [])
+
+        result = list(stream)
+        assert result == []
+
+        # Test keys with empty stream
+        tag_keys, packet_keys = stream.keys()
+        assert tag_keys == []
+        assert packet_keys == []
+
+    def test_creation_with_single_item(self):
+        """Test creation with single tag-packet pair."""
+        tags = [{"id": 1}]
+        packets = [{"data": "test"}]
+        stream = SyncStreamFromLists(tags, packets)
+
+        result = list(stream)
+        assert result == [({"id": 1}, {"data": "test"})]
+
+    def test_mismatched_list_lengths(self):
+        """Test behavior with different length tag and packet lists."""
+        tags = [{"id": 1}, {"id": 2}, {"id": 3}]
+        packets = [{"data": "a"}, {"data": "b"}]  # Shorter list
+
+        stream = SyncStreamFromLists(tags, packets)
+        result = list(stream)
+
+        # Should zip to shortest length
+        assert len(result) == 2
+        assert result == [
+            ({"id": 1}, {"data": "a"}),
+            ({"id": 2}, {"data": "b"}),
+        ]
+
+    def test_keys_inference_comprehensive(self):
+        """Test comprehensive key inference from data."""
+        tags = [
+            {"id": 1, "type": "A", "category": "test"},
+            {"id": 2, "type": "B"},  # Missing category
+            {"id": 3, "category": "prod", "extra": "value"},  # Missing type, has extra
+        ]
+        packets = [
+            {"data": "hello", "size": 5, "meta": {"info": "test"}},
+            {"data": "world", "count": 10},  # Missing size, meta; has count
+            {"size": 3, "format": "json"},  # Missing data; has format
+        ]
+
+        stream = SyncStreamFromLists(tags, packets)
+        tag_keys, packet_keys = stream.keys()
+
+        # Should include all keys found across all items
+        expected_tag_keys = {"id", "type", "category", "extra"}
+        expected_packet_keys = {"data", "size", "meta", "count", "format"}
+
+        assert set(tag_keys) == expected_tag_keys
+        assert set(packet_keys) == expected_packet_keys
+
+    def test_multiple_iterations_consistency(self, sample_tags_packets):
+        """Test that multiple iterations return consistent results."""
+        tags, packets = sample_tags_packets
+        stream = SyncStreamFromLists(tags, packets)
+
+        # Multiple iterations should be identical
+        result1 = list(stream)
+        result2 = list(stream)
+        result3 = list(stream)
+
+        assert result1 == result2 == result3
+        assert len(result1) == len(tags)
+
+    def test_iteration_with_generators_as_input(self):
+        """Test creation with generator inputs (should work since converted to lists)."""
+
+        def tag_gen():
+            for i in range(3):
+                yield {"id": i}
+
+        def packet_gen():
+            for i in range(3):
+                yield {"value": i * 10}
+
+        # Should accept generators and convert them
+        stream = SyncStreamFromLists(list(tag_gen()), list(packet_gen()))
+        result = list(stream)
+
+        assert len(result) == 3
+        assert result[0] == ({"id": 0}, {"value": 0})
+        assert result[1] == ({"id": 1}, {"value": 10})
+        assert result[2] == ({"id": 2}, {"value": 20})
+
+    def test_memory_efficiency_large_lists(self):
+        """Test memory efficiency with large lists."""
+        # Create large but not excessive lists
+        size = 1000
+        tags = [{"id": i} for i in range(size)]
+        packets = [{"value": i * 2} for i in range(size)]
+
+        stream = SyncStreamFromLists(tags, packets)
+
+        # Should be able to iterate without memory issues
+        count = 0
+        for tag, packet in stream:
+            count += 1
+            assert tag["id"] == packet["value"] // 2
+
+        assert count == size
+
+    def test_data_types_preservation(self):
+        """Test that various data types are preserved correctly."""
+        tags = [
+            {"int": 42, "float": 3.14, "str": "hello"},
+            {"bool": True, "none": None, "list": [1, 2, 3]},
+            {"dict": {"nested": "value"}, "tuple": (1, 2)},
+        ]
+        packets = [
+            {"complex": 1 + 2j, "bytes": b"binary", "set": {1, 2, 3}},
+            {"lambda": lambda x: x * 2},  # Function objects
+            {"custom": {"deep": {"nesting": {"value": 123}}}},
+        ]
+
+        stream = SyncStreamFromLists(tags, packets)
+        result = list(stream)
+
+        # Verify data type preservation
+        assert result[0][0]["int"] == 42
+        assert result[0][0]["float"] == 3.14
+        assert result[0][1]["complex"] == 1 + 2j
+        assert result[0][1]["bytes"] == b"binary"
+
+        assert result[1][0]["bool"] is True
+        assert result[1][0]["none"] is None
+        assert callable(result[1][1]["lambda"])
+
+        assert result[2][0]["dict"]["nested"] == "value"
+        assert result[2][1]["custom"]["deep"]["nesting"]["value"] == 123
+
+    def test_mutable_data_safety(self):
+        """Test that mutable data doesn't cause unexpected sharing."""
+        shared_dict = {"shared": "value"}
+        tags = [{"ref": shared_dict}, {"ref": shared_dict}]
+        packets = [{"data": "a"}, {"data": "b"}]
+
+        stream = SyncStreamFromLists(tags, packets)
+        result = list(stream)
+
+        # Modify the shared dict
+        shared_dict["shared"] = "modified"
+
+        # The stream results should reflect the change (references preserved)
+        assert result[0][0]["ref"]["shared"] == "modified"
+        assert result[1][0]["ref"]["shared"] == "modified"
+
+    def test_label_and_metadata(self, sample_tags_packets):
+        """Test stream labeling and metadata handling."""
+        tags, packets = sample_tags_packets
+
+        # Test with custom label
+        stream = SyncStreamFromLists(tags, packets, label="test_stream")
+        assert stream.label == "test_stream"
+
+        # Test default label generation
+        stream_auto = SyncStreamFromLists(tags, packets)
+        assert "SyncStreamFromLists_" in stream_auto.label
+
+
+class TestSyncStreamFromGenerator:
+    """Comprehensive tests for SyncStreamFromGenerator implementation."""
+
+    def test_basic_creation_and_iteration(self, sample_data):
+        """Test basic creation and iteration functionality."""
+
+        def generator():
+            for item in sample_data:
+                yield item
+
+        stream = SyncStreamFromGenerator(generator)
+        assert isinstance(stream, SyncStream)
+
+        result = list(stream)
+        assert result == sample_data
+
+    def test_empty_generator(self):
+        """Test with generator that yields nothing."""
+
+        def empty_gen():
+            return
+            yield  # Never reached
+
+        stream = SyncStreamFromGenerator(empty_gen)
+        result = list(stream)
+        assert result == []
+
+    def test_single_item_generator(self):
+        """Test with generator that yields single item."""
+
+        def single_gen():
+            yield ({"id": 1}, {"data": "test"})
+
+        stream = SyncStreamFromGenerator(single_gen)
+        result = list(stream)
+        assert result == [({"id": 1}, {"data": "test"})]
+
+    def test_generator_exhaustion(self, sample_data):
+        """Test that generators are exhausted after iteration."""
+
+        def generator():
+            for item in sample_data:
+                yield item
+
+        stream = SyncStreamFromGenerator(generator)
+
+        # First iteration consumes generator
+        result1 = list(stream)
+        assert result1 == sample_data
+
+        # Second iteration gets empty results (generator exhausted)
+        result2 = list(stream)
+        assert result2 == []
+
+    def test_lazy_evaluation(self):
+        """Test that generator evaluation is lazy."""
+        call_log = []
+
+        def tracking_generator():
+            call_log.append("generator_started")
+            for i in range(3):
+                call_log.append(f"yielding_{i}")
+                yield ({"id": i}, {"value": i * 10})
+            call_log.append("generator_finished")
+
+        stream = SyncStreamFromGenerator(tracking_generator)
+
+        # Generator should not have started yet
+        assert call_log == []
+
+        # Start iteration but don't consume everything
+        iterator = iter(stream)
+        next(iterator)
+
+        # Should have started and yielded first item
+        assert "generator_started" in call_log
+        assert "yielding_0" in call_log
+        assert "yielding_1" not in call_log
+
+    def test_generator_with_exception(self):
+        """Test generator that raises exception during iteration."""
+
+        def failing_generator():
+            yield ({"id": 1}, {"data": "ok"})
+            yield ({"id": 2}, {"data": "ok"})
+            raise ValueError("Something went wrong")
+            yield ({"id": 3}, {"data": "never_reached"})
+
+        stream = SyncStreamFromGenerator(failing_generator)
+
+        # Should propagate exception
+        with pytest.raises(ValueError, match="Something went wrong"):
+            list(stream)
+
+    def test_generator_partial_consumption(self, sample_data):
+        """Test partial consumption of generator."""
+
+        def generator():
+            for item in sample_data:
+                yield item
+
+        stream = SyncStreamFromGenerator(generator)
+
+        # Consume only part of the stream
+        iterator = iter(stream)
+        first_item = next(iterator)
+        second_item = next(iterator)
+
+        assert first_item == sample_data[0]
+        assert second_item == sample_data[1]
+
+        # Rest of generator should still be available
+        remaining = list(iterator)
+        assert remaining == sample_data[2:]
+
+    def test_generator_with_infinite_sequence(self):
+        """Test generator with infinite sequence (partial consumption)."""
+
+        def infinite_generator():
+            i = 0
+            while True:
+                yield ({"id": i}, {"value": i * i})
+                i += 1
+
+        stream = SyncStreamFromGenerator(infinite_generator)
+
+        # Consume just first few items
+        iterator = iter(stream)
+        results = []
+        for _ in range(5):
+            results.append(next(iterator))
+
+        assert len(results) == 5
+        assert results[0] == ({"id": 0}, {"value": 0})
+        assert results[4] == ({"id": 4}, {"value": 16})
+
+    def test_generator_with_complex_logic(self):
+        """Test generator with complex internal logic."""
+
+        def complex_generator():
+            # Generator with state and complex logic
+            state = {"count": 0, "filter_odd": True}
+
+            for i in range(10):
+                state["count"] += 1
+
+                if state["filter_odd"] and i % 2 == 1:
+                    continue  # Skip odd numbers initially
+
+                if i == 6:  # Change behavior mid-stream
+                    state["filter_odd"] = False
+
+                yield ({"id": i, "count": state["count"]}, {"value": i * 2})
+
+        stream = SyncStreamFromGenerator(complex_generator)
+        result = list(stream)
+
+        # Should have skipped odds initially, then included them
+        ids = [item[0]["id"] for item in result]
+        assert 0 in ids and 2 in ids and 4 in ids and 6 in ids  # Evens
+        assert 1 not in ids and 3 not in ids and 5 not in ids  # Early odds skipped
+        assert 7 in ids and 8 in ids and 9 in ids  # Later odds included
+
+    def test_keys_inference_limitation(self):
+        """Test that key inference may be limited for generators."""
+
+        def generator():
+            yield ({"id": 1, "type": "A"}, {"data": "hello", "size": 5})
+            yield ({"id": 2, "type": "B"}, {"data": "world", "count": 10})
+
+        stream = SyncStreamFromGenerator(generator)
+
+        # Keys might not be available without consuming stream
+        tag_keys, packet_keys = stream.keys()
+
+        # Implementation-dependent: might be None or inferred
+        if tag_keys is not None:
+            assert isinstance(tag_keys, (list, tuple, set))
+        if packet_keys is not None:
+            assert isinstance(packet_keys, (list, tuple, set))
+
+    def test_memory_efficiency(self):
+        """Test memory efficiency of generator streams."""
+
+        def memory_efficient_generator():
+            # Generate large number of items without storing them all
+            for i in range(10000):
+                yield ({"id": i}, {"value": i * 2})
+
+        stream = SyncStreamFromGenerator(memory_efficient_generator)
+
+        # Process in chunks to verify memory efficiency
+        count = 0
+        for tag, packet in stream:
+            count += 1
+            assert tag["id"] == packet["value"] // 2
+
+            if count >= 100:  # Don't process all 10k items in test
+                break
+
+        assert count == 100
+
+    def test_generator_function_vs_generator_object(self, sample_data):
+        """Test creation with generator function vs generator object."""
+
+        def gen_function():
+            for item in sample_data:
+                yield item
+
+        # Test with generator function (should work)
+        stream1 = SyncStreamFromGenerator(gen_function)
+        result1 = list(stream1)
+
+        # Test with generator object (should work)
+        gen_object = gen_function()
+        stream2 = SyncStreamFromGenerator(lambda: gen_object)
+        result2 = list(stream2)
+
+        assert result1 == sample_data
+        assert result2 == sample_data
+
+    def test_label_and_metadata(self, sample_data):
+        """Test stream labeling and metadata handling."""
+
+        def generator():
+            for item in sample_data:
+                yield item
+
+        # Test with custom label
+        stream = SyncStreamFromGenerator(generator, label="test_gen_stream")
+        assert stream.label == "test_gen_stream"
+
+        # Test default label generation
+        stream_auto = SyncStreamFromGenerator(generator)
+        assert "SyncStreamFromGenerator_" in stream_auto.label
+
+
+class TestStreamImplementationComparison:
+    """Tests comparing different stream implementations."""
+
+    def test_equivalent_output(self, sample_data):
+        """Test that both implementations produce equivalent output for same data."""
+        tags, packets = zip(*sample_data)
+
+        # Create streams from same data using different implementations
+        list_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        def generator():
+            for item in sample_data:
+                yield item
+
+        gen_stream = SyncStreamFromGenerator(generator)
+
+        # Results should be identical
+        list_result = list(list_stream)
+        gen_result = list(gen_stream)
+
+        assert list_result == gen_result == sample_data
+
+    def test_multiple_iteration_behavior(self, sample_data):
+        """Test different behavior in multiple iterations."""
+        tags, packets = zip(*sample_data)
+
+        list_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        def generator():
+            for item in sample_data:
+                yield item
+
+        gen_stream = SyncStreamFromGenerator(generator)
+
+        # List stream should support multiple iterations
+        list_result1 = list(list_stream)
+        list_result2 = list(list_stream)
+        assert list_result1 == list_result2
+
+        # Generator stream should only work once
+        gen_result1 = list(gen_stream)
+        gen_result2 = list(gen_stream)
+        assert gen_result1 == sample_data
+        assert gen_result2 == []  # Exhausted
+
+    def test_performance_characteristics(self):
+        """Test performance characteristics of different implementations."""
+        import time
+
+        size = 1000
+        tags = [{"id": i} for i in range(size)]
+        packets = [{"value": i * 2} for i in range(size)]
+
+        # Time list-based stream creation and consumption
+        start = time.time()
+        list_stream = SyncStreamFromLists(tags, packets)
+        list_result = list(list_stream)
+        list_time = time.time() - start
+
+        # Time generator-based stream creation and consumption
+        def generator():
+            for tag, packet in zip(tags, packets):
+                yield tag, packet
+
+        start = time.time()
+        gen_stream = SyncStreamFromGenerator(generator)
+        gen_result = list(gen_stream)
+        gen_time = time.time() - start
+
+        # Results should be equivalent
+        assert list_result == gen_result
+
+        # Both should complete in reasonable time (implementation dependent)
+        assert list_time < 1.0  # Should be fast
+        assert gen_time < 1.0  # Should be fast
+
+    def test_error_handling_consistency(self):
+        """Test that error handling is consistent between implementations."""
+
+        def failing_generator():
+            yield ({"id": 1}, {"data": "ok"})
+            raise RuntimeError("Generator error")
+
+        # Generator stream should propagate error
+        gen_stream = SyncStreamFromGenerator(failing_generator)
+        with pytest.raises(RuntimeError, match="Generator error"):
+            list(gen_stream)
+
+        # List stream with problematic data
+        tags = [{"id": 1}, None]  # None tag might cause issues
+        packets = [{"data": "ok"}, {"data": "also_ok"}]
+
+        list_stream = SyncStreamFromLists(tags, packets)
+        result = list(list_stream)  # Should handle None gracefully
+
+        assert len(result) == 2
+        assert result[1] == (None, {"data": "also_ok"})
+
+    def test_integration_with_operations(self, sample_data):
+        """Test that both stream types work equivalently with operations."""
+        from orcabridge.mapper import Filter
+
+        tags, packets = zip(*sample_data)
+
+        # Create equivalent streams
+        list_stream = SyncStreamFromLists(list(tags), list(packets))
+
+        def generator():
+            for item in sample_data:
+                yield item
+
+        gen_stream = SyncStreamFromGenerator(generator)
+
+        # Apply same operation to both
+        filter_op = Filter(lambda tag, packet: tag["id"] > 1)
+
+        filtered_list = filter_op(list_stream)
+        filtered_gen = filter_op(gen_stream)
+
+        list_result = list(filtered_list)
+        gen_result = list(filtered_gen)
+
+        # Results should be equivalent
+        assert list_result == gen_result
+        assert len(list_result) == 2  # Should have filtered out id=1
diff --git a/uv.lock b/uv.lock
index 23ca96e..26e122a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2,10 +2,20 @@ version = 1
 revision = 2
 requires-python = ">=3.10"
 resolution-markers = [
-    "python_full_version >= '3.11'",
+    "python_full_version >= '3.12'",
+    "python_full_version == '3.11.*'",
     "python_full_version < '3.11'",
 ]
 
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
 [[package]]
 name = "appnope"
 version = "0.1.4"
@@ -15,6 +25,80 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321, upload-time = "2024-02-06T09:43:09.663Z" },
 ]
 
+[[package]]
+name = "arro3-core"
+version = "0.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.12'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d9/eb/2a166478dfc951958bf4cd33891bfa346ab9c53c3a87f5ffe99dbe981577/arro3_core-0.5.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a85c4d78fb4a3e3b216b01e44ac16121a06e80169555cd0f7b8fcf038a6c14b3", size = 2448695, upload-time = "2025-05-31T23:17:55.526Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/c0/2b1719accd4cc2f81bd36ad79a16750a63e0d7a5132e43115b586d52e21d/arro3_core-0.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2dd7a3b78c8936407e4eebbbe3134410d1be0c51fb697a8b8a5c8118690190a9", size = 2155415, upload-time = "2025-05-31T23:17:57.992Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/dc/6bcb859c4a83fff95b2ccc906c027db1f0396610a57bafc90bd933dcce83/arro3_core-0.5.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fae76973505d64cebf26a30c78d37a5a1fe012b3d6a6c682fea33ebd1dfc4d99", size = 2594341, upload-time = "2025-05-31T23:18:01.536Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/48/109cf08ca7532636d4c356a421e1620e7b01fb6882e12b6afbfa4b933c38/arro3_core-0.5.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c65d5ffb89cf9bcc62bb7f64beb049877ca03b504841ffc3cab6e853a13637c", size = 2637344, upload-time = "2025-05-31T23:18:05.307Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/4b/5a9dfc81195c8fcf2f99f9cb8f3d8c23ca9da541964d44e409a01ab06d3b/arro3_core-0.5.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad9e3e69a0888bf1cd2c9cf2e7d60787ac9bf3b9508937bfb6ff55aba9a6b56b", size = 2878497, upload-time = "2025-05-31T23:18:08.803Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/26/a2a0685f3648afb20bbe4920cee6dc8a29b9942fa8c0190f6a8fc3ad4ef3/arro3_core-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36484d31141691c48d6e48f1c387d3b19fe5a814ffcde26b2ac04ebe68f81c76", size = 2540359, upload-time = "2025-05-31T23:18:12.092Z" },
+    { url = "https://files.pythonhosted.org/packages/64/40/6b22f0f094d905d610945a9b7d4662d5f143f6638c37e89fb888443aee64/arro3_core-0.5.1-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:78942ee33f55758ce0138b30377185f2d93b9221fb5c239075b56159b3e3fb5b", size = 2289699, upload-time = "2025-05-31T23:18:15.895Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/46/eebe9826aeca54bc04bf8ed6e9506134dcf1d02a960482b0164a98d51800/arro3_core-0.5.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:063b9ffe896dbd01649eb46d04b55f19eb6bc7fa505d1781d64308e13a2510cc", size = 2723968, upload-time = "2025-05-31T23:18:19.597Z" },
+    { url = "https://files.pythonhosted.org/packages/90/bc/5c2361010692854efb47211e15eeeb9cef02eb037dbb95b9dd68b4554ba7/arro3_core-0.5.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a818344b61b59c09c3f6550c03e5b01678535160b35d38eaa5f988667df69187", size = 2435669, upload-time = "2025-05-31T23:18:22.649Z" },
+    { url = "https://files.pythonhosted.org/packages/39/0d/1fef7dcca81696bdea0e79971155b114fb3fb204f177eed25a07f856f57a/arro3_core-0.5.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:60fa11fe05f3b48e7b37c1d4f12d94ef678514d2e908033ac30d10d04b1bd957", size = 2869358, upload-time = "2025-05-31T23:18:27.008Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/02/1196e7f795658a5ef7c4b5811fe84845025e7baf391d05be36e763336156/arro3_core-0.5.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8d1ce524ca27598154f84cf980c6fa4baf0c1379584de2e922e88905dfb939dd", size = 2797000, upload-time = "2025-05-31T23:18:30.694Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/ea/31bc0bc32ad3e22a937c866b685e0b1123f4747dabc23703531d7626a5d2/arro3_core-0.5.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2546df75769b60bbd74aa7a169cd538e909aabf2200a99edfdda542e560b5c11", size = 2709346, upload-time = "2025-05-31T23:18:34.125Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/2c/6bfb3a4cd26b1fed099767e124063f0b4fe5e7f0cab0160004ba5900cad0/arro3_core-0.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:d89350dc36f58c9c0fb941fbcd46e2e00f76f3438844ef3dce2419ce64631739", size = 2611596, upload-time = "2025-05-31T23:18:37.826Z" },
+    { url = "https://files.pythonhosted.org/packages/86/45/c2540f04330f52f431a0ca0824c15d86fc38dd8b3f2af027a41a90ea91e7/arro3_core-0.5.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:e6c43f2f59cd43044663969031c4ef29aab76247b5bda74800187a8b9bda3b9e", size = 2448953, upload-time = "2025-05-31T23:18:40.996Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/8f/9fc60dcc201f72f3d9d2ca86b61ff374eb640b58a65660b8de2ac53654d6/arro3_core-0.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:006214e68df6f66bbd1712989258cac2b307085627962348749cc2802b843f25", size = 2155535, upload-time = "2025-05-31T23:18:44.178Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/9e/4e6a3c41b52b08b8f34f7830df2a0e499d3e4ab43c6d45984e2af13fa780/arro3_core-0.5.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:be77d366d43025599a5a0c520cced43c181f750cf6bcc174a72a97a7338f9e37", size = 2594752, upload-time = "2025-05-31T23:18:47.586Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/77/94d8099c8fbfe3489ec92da76f65844b276f82b18d9cb6a547a717bd38cc/arro3_core-0.5.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca7cba980b3d2e3552dd06da67c8c298d970bd9430ed661a2316c893bfca3873", size = 2637291, upload-time = "2025-05-31T23:18:50.539Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/22/050c75161bcbe2e6b3ff5f8de11f760a376523fa905f4787b09bab65a4b5/arro3_core-0.5.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1866f014ca091049692d81601760b65fdad7b779d9c73698f709cd6ee4e8b5c3", size = 2869405, upload-time = "2025-05-31T23:18:53.73Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/88/87a3293db47dab5b23ecd910532f02c56d15f52920fc5d72404935126345/arro3_core-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e1433e98b4385f2565c59d69c1bbb4f18da7d2693d2d9712e219e020e8f9025", size = 2540544, upload-time = "2025-05-31T23:18:56.954Z" },
+    { url = "https://files.pythonhosted.org/packages/71/e8/f85ce3be71c967b24e96c3af589ae3390548ab0d9fd69d5ed535225fd620/arro3_core-0.5.1-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:afba61734d4fc772ddf26888c299f94157e530a080835a981431a37398168fd6", size = 2289505, upload-time = "2025-05-31T23:19:00.354Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/4b/432eb5135fbcc5d8770ad7bd4193545e97588caf1f690d4f724bbb927632/arro3_core-0.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:69b8885acf0e94b54adb6f060b4c41ee138d171b37a6356b690bece6b911565d", size = 2724357, upload-time = "2025-05-31T23:19:04.201Z" },
+    { url = "https://files.pythonhosted.org/packages/83/91/056ab3166c5e562eab66477f573aff02bb4b92ba0de8affffd1bace6e50c/arro3_core-0.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2fe8f6d43697719abf822f9f02df7547681669c092b41bcee2b3a689f99e1588", size = 2435801, upload-time = "2025-05-31T23:19:07.617Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/5f/b7a6a2106ba508e20f9788bb53c71b56211defd3729c7bcfe6ec09d36fd1/arro3_core-0.5.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a2aa298a78135d993e9257f110ac140e008d7bdc11eb23d8bc1c02493afbdf5a", size = 2869804, upload-time = "2025-05-31T23:19:11.059Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/e3/d95fbff21b27b06faa892c65621ea429391d0bfb926cdeb557db2d452a33/arro3_core-0.5.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:508688336dfc4667f8571115924857ae4629044ebeb4d3dedeabc33e287b2bca", size = 2797201, upload-time = "2025-05-31T23:19:14.674Z" },
+    { url = "https://files.pythonhosted.org/packages/45/07/7ab65b01110e9459db2f2d37972826aa31a367ee98e95c7664f0eb13963d/arro3_core-0.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:31463bda8a942f5ae0e4a06c8fbe2424367b820d93f6f3b82c6f775f9a966780", size = 2709306, upload-time = "2025-05-31T23:19:17.913Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/15/0bebe279425bb70bd0a712dd45dcb4418deb9f32057ff5b9efd7947a65d3/arro3_core-0.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:0223d878f5f23c17600cab853cecce963c38fe365efa5f157f016706314018f1", size = 2611539, upload-time = "2025-05-31T23:19:21.358Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/9c/af3c6127548630beaa319746770265b2fb996bb3e6dba8d16f78910bc070/arro3_core-0.5.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:afccbaf951a84d6eafb4384692ea557ad06887c6db8825e9417647f805735936", size = 2438592, upload-time = "2025-05-31T23:19:24.494Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/50/057c93a846bbc5e5e55a976ea4fc00255332f64e5f9b1abfc218bb184f48/arro3_core-0.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37325ec2f47a4dce40fa871935000708b545f3981c8e2bde7d7a031f2e098865", size = 2145488, upload-time = "2025-05-31T23:19:27.886Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/8c/cbb785ecb9a0df254f5a761fc5ac7c8c5a6f93b0116e994ecf2797984f80/arro3_core-0.5.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:63ac803d46127d8c01bc4ffbb5911f10e51c063c9bcc76ba0258378bda683383", size = 2592145, upload-time = "2025-05-31T23:19:31.499Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/ee/405d2bdb88a97f03fb64f2cb655274f58439f8aa6e3cf9d2034581899edb/arro3_core-0.5.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dbaf8ccce7637631ed5dc7d53b58aaa0f8c7e935b772ff10a31c9cee571b799a", size = 2637122, upload-time = "2025-05-31T23:19:34.918Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/0c/9f611398d63b686ea990d6dcf88a98ec7bc66a78d12c27829f80bf8696bc/arro3_core-0.5.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b76a47e326311dea3b8ff302ec0d2741d85a7736be472e39314a87569e4552c", size = 2876345, upload-time = "2025-05-31T23:19:38.3Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/57/1bc7bd889c65d190a7ce609a720b16f0280e84da87f1c408c34fd099ecaf/arro3_core-0.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4331989dbbeb6c47db6ee4502df940e90a04b1066bc4d044b9f3c273eb5a0aeb", size = 2537679, upload-time = "2025-05-31T23:19:42.33Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/29/9a692f67bdcf2bb07a2635d526ee8751db0676fdb6074d2eee64918ec7db/arro3_core-0.5.1-cp312-cp312-manylinux_2_24_aarch64.whl", hash = "sha256:b9ec0d1e4ffe4cc831dfe67fcb0ca9b263743ca56f47bd433ee53af1993687a9", size = 2286635, upload-time = "2025-05-31T23:19:45.716Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/23/b37f5eb6db22d02e0c23b502e4d29d85cb8483706feb76a1a2b5b33498f7/arro3_core-0.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce7009d5b24c21b641d9e254d81ff7e3f89f8bc20100d4f56e36211ccc72a897", size = 2722295, upload-time = "2025-05-31T23:19:49.461Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/bf/7cc411fbbf78049c0c3395c5757f51df569dee1f20d212a9822ead974315/arro3_core-0.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:924ce175770c6dc351ff9482a716de6d54afff817d340bea1902f60c9c8edec8", size = 2431978, upload-time = "2025-05-31T23:19:52.724Z" },
+    { url = "https://files.pythonhosted.org/packages/73/0a/52d132ca671739f6c82529a4cc75d4872b9ca0a52dce4b8e7c930af5adf1/arro3_core-0.5.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:aa6a8c4bd99f846fcd7f593f1177c0cf248f6c447c9f612cf336b6426ab53429", size = 2866622, upload-time = "2025-05-31T23:19:56.917Z" },
+    { url = "https://files.pythonhosted.org/packages/df/7d/925aa386f08a4f0e6b1f54625c8b7536fb3c6f1335377c553fc16b330e75/arro3_core-0.5.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7cbec3c4422fbfc41efb68def5020377134ff64cda9ce6f302ed93a600765906", size = 2793027, upload-time = "2025-05-31T23:20:00.373Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/0c/dd4a90153fefa49829b20358c6f23b7d33c2613b6e05f2956e4775a9e0a1/arro3_core-0.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3485b3f00366407960f30b4b5032fdfeea87cdb103b493c38ad0592534ba9f0b", size = 2706493, upload-time = "2025-05-31T23:20:03.829Z" },
+    { url = "https://files.pythonhosted.org/packages/45/54/f6aafaef5388fe260e4bc02d00442e8bf2f9966637b2ddcb1661d8366c59/arro3_core-0.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:36ccfc7316f5aa534ee0d647720932bf6c18546e55034dadea625a9bb84c9baf", size = 2612703, upload-time = "2025-05-31T23:20:07.906Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/3f/52336dca7f4784b778d458f7071e5746db33825cb57509fd35196522e5df/arro3_core-0.5.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:bb7fba3c4324db78615b5440ac51f46022ce7674489d96f8916491c117102e47", size = 2438140, upload-time = "2025-05-31T23:20:11.45Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/02/32d2c8fa81b33e587b9b6be0a71a0e46523f50f1b20d1903b0fb3f1d9cad/arro3_core-0.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39bad825cb042f22be5f5ab019844541398a3393d154e3675013b4ebb825b3b9", size = 2145410, upload-time = "2025-05-31T23:20:14.919Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/72/4632d4240f2d10de16050314263932c80a7bfabab22688e3dcdc1505a0d6/arro3_core-0.5.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6520a6cc6e22fe2f8064dc8e4f93961e05fb9a486c921f71a5ef49843c27d24", size = 2591203, upload-time = "2025-05-31T23:20:18.219Z" },
+    { url = "https://files.pythonhosted.org/packages/03/c6/8fd3fcf7a1ccfaeb62827457785293a5ad1a8bf44623903d7e5d99212cb5/arro3_core-0.5.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63e9e96c034177721b8d5af36d4deff3e93411a24b009b4565e08711cddbbc75", size = 2636665, upload-time = "2025-05-31T23:20:21.805Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/84/f5df7ed0eeb1fdaa3cd4d19fb829dca791c3b5108e5f5350a50ff34da914/arro3_core-0.5.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:639083eb1712bd39540761a25ab786ba9cb51e0710bb77b21499a2914ba076d4", size = 2883496, upload-time = "2025-05-31T23:20:25.701Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/e1/6ab0dd6f362f95ef855d2ba7aacf55c9dd08c55a3d8c5339eafa20f3e0f3/arro3_core-0.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4876a3c34bd54d970c498e2f61bfb7e36306934fd6acbfa5de497f093972bf0", size = 2536753, upload-time = "2025-05-31T23:20:29.237Z" },
+    { url = "https://files.pythonhosted.org/packages/53/20/b0d9bd9b6ccac1c53abb29961046364fb1fba84e9ebd3726ff996bb07b53/arro3_core-0.5.1-cp313-cp313-manylinux_2_24_aarch64.whl", hash = "sha256:a4b93fcc5464bd2b638402b56032a1d3cecb78d668d0aa1035d2ee7ee7487abb", size = 2286389, upload-time = "2025-05-31T23:20:32.66Z" },
+    { url = "https://files.pythonhosted.org/packages/49/21/8338d0a2ede9128dc46f44601b584ec3544f9ee2d43c841307d563e8cdfa/arro3_core-0.5.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8effd284a02b2a685736eb0365528842992a770a3bf544ece4ccc0ed9a7bf703", size = 2721899, upload-time = "2025-05-31T23:20:36.269Z" },
+    { url = "https://files.pythonhosted.org/packages/67/96/f90db955ed8b8d422d09b15e3b1f759a02e4700021f2e4ac68dd5cedca51/arro3_core-0.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cbc512e90647176528ea09ac18a5d27a47a0ac05755b7924ffcb89923dbf6e38", size = 2431834, upload-time = "2025-05-31T23:20:40.269Z" },
+    { url = "https://files.pythonhosted.org/packages/88/f3/c58d9769d46b13f6d51ff5998885396ef224eb384a0ebda236ef26a833a7/arro3_core-0.5.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d4d0141a6b7f5744750cc4066f564cfd509df6857704a2a9a29946a7c2f08f2b", size = 2866047, upload-time = "2025-05-31T23:20:43.72Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/7a/af901793fa426e8b86194654820c3612001b165b25f3bd7adde8d9e7bef4/arro3_core-0.5.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8c14b496f93906125baccef75703f0ea1c91608c201296bc21a1e916e5eb42c", size = 2792693, upload-time = "2025-05-31T23:20:47.071Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/97/651eb8358d64d2bf5353db3d31ae6cb06529a07d2be699aa6a27434c6811/arro3_core-0.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:40e9db9564f22286310c5304884468b98d4eeb628f71c22f27d527e4219ae247", size = 2706150, upload-time = "2025-05-31T23:20:51.012Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/af/0d591453490941e7cd2524ccac0398824eabafa745d0a25a758b1de2e361/arro3_core-0.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:bb0b13975c5394cb6a9887495aaf06cad8993893f99911c8aa2b827cd55dd6a8", size = 2612300, upload-time = "2025-05-31T23:20:54.249Z" },
+    { url = "https://files.pythonhosted.org/packages/74/5c/c7135425c172d7fbc94c47ab48d46431d52df5b5f888bc140f7b2b710037/arro3_core-0.5.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f778d41f63cadb1b9e6bce3446e2758db271bc9b81878617232729053c7520fc", size = 2447436, upload-time = "2025-05-31T23:21:45.231Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/2c/b7f94e70101abaafa78a36445fdeadfc4461535a0acf55cd9c20bdc7e2b3/arro3_core-0.5.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:4df0b8594518bec2602d1b289dbabf22b9b0b63affc90ff0d6107990208c5e67", size = 2154852, upload-time = "2025-05-31T23:21:48.708Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/05/020b1cc1449755d35ba91d814d047fa20d18b9fb577a9fe9b87c72a1a217/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1165f2973c7472e564cef53505cc55852991733f00991b42d011d0f76c4c4c4a", size = 2593644, upload-time = "2025-05-31T23:21:52.812Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/92/5160d6adaad3a1db443ff5409353ec4df82d2068a8ed9b8e738036325c3c/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:abafcb7f2fe892700e5821b5158c98fad772a2c7412c9b35e4174ed919e24ed4", size = 2635380, upload-time = "2025-05-31T23:21:56.684Z" },
+    { url = "https://files.pythonhosted.org/packages/53/21/4aa439cc2b597e0de66aef03f0f509afe206547b0794ce0ba004134fe716/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93120f0de07f2cac507219e74ef25a95a10fc5ec5a2d51c2fd117db2929220df", size = 2867549, upload-time = "2025-05-31T23:22:00.93Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/01/1338fff3c27366cd9ffc444c96aa74bfea3dc8ebb9dea4ee33346d74bccd/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:657896fc1e50e39d6ad9261f15cca103f26a7213dc30a6300dbcec6c5cc5a72d", size = 2539421, upload-time = "2025-05-31T23:22:04.631Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/78/3660ee1f71074a5195ae96c0cc9b58464c588705a5a93cc26b4f23a51cac/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_24_aarch64.whl", hash = "sha256:a8a6df4af193898b6e09902ba76a9c0c8699efaf91b3cff87d5f49cc97e04544", size = 2289147, upload-time = "2025-05-31T23:22:08.53Z" },
+    { url = "https://files.pythonhosted.org/packages/85/cb/37d165bdb1633249e2e987d52d00308f790b4d24121b2a0a2a7817e1f8bb/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0d3faf92e45b479cd5556370db1c8895f153d9f59c52fdbd85af751838c8b218", size = 2723645, upload-time = "2025-05-31T23:22:12.604Z" },
+    { url = "https://files.pythonhosted.org/packages/40/18/3edf9949cc09f9545e06abe8fd2b92eff71e86f8927062a3ab8cb1320377/arro3_core-0.5.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:07e358e8ea9c7b8fa38af79d0942b1e3174123541584370e9020394101d4198a", size = 2434306, upload-time = "2025-05-31T23:22:16.431Z" },
+    { url = "https://files.pythonhosted.org/packages/87/2e/98a874f5f3b3baf911d8b87151b6654ac161ccb09ebb2cf621ac4da2edc3/arro3_core-0.5.1-pp310-pypy310_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:70cfb884cfb465f4c0143a38e172a6de4a904afe884bd6773a89c4c6659c41e7", size = 2868790, upload-time = "2025-05-31T23:22:20.536Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/4c/0f7aa37d3374a82fa084517ac353378fc397685422ee1eac8884044cd487/arro3_core-0.5.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:9f47326af6c10cec993cee9cbcc4e554dc0c06269e2ba6f83c68235ae13ee98c", size = 2796671, upload-time = "2025-05-31T23:22:24.62Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/90/1c0714e2c1af68229e8d49c53a861399654b26152a19306927e48740dbd1/arro3_core-0.5.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:5832859f53eb82c67bda2a655d466fb8520d096166df4ee9b0b17df748cbacb1", size = 2708649, upload-time = "2025-05-31T23:22:28.719Z" },
+]
+
 [[package]]
 name = "asttokens"
 version = "3.0.0"
@@ -33,6 +117,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" },
 ]
 
+[[package]]
+name = "cachetools"
+version = "5.5.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" },
+]
+
 [[package]]
 name = "certifi"
 version = "2025.4.26"
@@ -160,6 +253,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" },
 ]
 
+[[package]]
+name = "click"
+version = "8.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload-time = "2025-05-20T23:19:47.796Z" },
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -369,6 +474,36 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
 ]
 
+[[package]]
+name = "deltalake"
+version = "1.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "arro3-core" },
+    { name = "deprecated" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1e/c3/19cd8457243c41aa60562d28b66271ff958d896e3fd9373816d8fd781f1a/deltalake-1.0.2.tar.gz", hash = "sha256:fbe4cccde0af14c6e30b62cc3dd09e9a46777e8fd8e375ec809a6bf4edea756c", size = 5076074, upload-time = "2025-06-02T11:08:14.063Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/74/043f52f50cbda7f651d39465fb7c5a9e8880e9a332abbb4f64c4d0522306/deltalake-1.0.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e4f24cdbadaf8a4c32ae535a44b89d8bcafd5cb97897de33a4ec8609058a7d50", size = 41649942, upload-time = "2025-06-02T11:08:17.754Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/99/ced0f538deacdf0f1e78e28a14c30420d8df1c7d9ca30ff8f71a03a008a7/deltalake-1.0.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:43731c48657c16c1728c90270e5e7ae1f3fa1a5b6fb0cb0b55c88c5c8f23cc3f", size = 38590012, upload-time = "2025-06-02T11:09:07.48Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/f1/feee0df833eed13a27aafeedfac313c0b6bf7b0d712fa5892b1099a7a752/deltalake-1.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c921b47e4810a346650141dae30abc69564e57f26e00cce256f1837dd9c4b5fd", size = 40281750, upload-time = "2025-06-02T11:08:52.532Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/6f/4707d7511bd172f6c6504e87ea0bc43cdf7b5a4c85340ff61cee83170e37/deltalake-1.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59a3b403e5871d12920798d27f2b1e4b70f4e975381841066cb6733ccbc80071", size = 51273870, upload-time = "2025-06-02T11:08:10.194Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/2a/1dfc1f337f85d62141b4e70923b923d5faccbe666d4253b670c6d506d1bb/deltalake-1.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:67d3224ce7e569bbb6d5181f9ed2530b237a1cdc87f413e5ff0bc1227aab50d5", size = 40293966, upload-time = "2025-06-02T11:08:51.989Z" },
+    { url = "https://files.pythonhosted.org/packages/78/a9/9014b804f947a505c21a6c0cbc87e2673cacb6cd82ac70be9a60f26a836b/deltalake-1.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:7a1606f535416d4a38ce554019f9fcad194aaec33d638328662b2de46af03059", size = 42567914, upload-time = "2025-06-02T11:23:49.313Z" },
+]
+
+[[package]]
+name = "deprecated"
+version = "1.2.18"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "wrapt" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744, upload-time = "2025-01-27T10:46:25.7Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998, upload-time = "2025-01-27T10:46:09.186Z" },
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.3.0"
@@ -431,6 +566,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/21/ff/995277586691c0cc314c28b24b4ec30610440fd7bf580072aed1409f95b0/fonttools-4.58.1-py3-none-any.whl", hash = "sha256:db88365d0962cd6f5bce54b190a4669aeed9c9941aa7bd60a5af084d8d9173d6", size = 1113429, upload-time = "2025-05-28T15:29:24.185Z" },
 ]
 
+[[package]]
+name = "fsspec"
+version = "2025.5.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bb/61/78c7b3851add1481b048b5fdc29067397a1784e2910592bc81bb3f608635/fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462", size = 199052, upload-time = "2025-05-24T12:03:21.66Z" },
+]
+
 [[package]]
 name = "httpie"
 version = "3.2.4"
@@ -525,7 +669,8 @@ name = "ipython"
 version = "9.2.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.11'",
+    "python_full_version >= '3.12'",
+    "python_full_version == '3.11.*'",
 ]
 dependencies = [
     { name = "colorama", marker = "python_full_version >= '3.11' and sys_platform == 'win32'" },
@@ -771,6 +916,78 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
+[[package]]
+name = "mmh3"
+version = "5.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/47/1b/1fc6888c74cbd8abad1292dde2ddfcf8fc059e114c97dd6bf16d12f36293/mmh3-5.1.0.tar.gz", hash = "sha256:136e1e670500f177f49ec106a4ebf0adf20d18d96990cc36ea492c651d2b406c", size = 33728, upload-time = "2025-01-25T08:39:43.386Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a1/01/9d06468928661765c0fc248a29580c760a4a53a9c6c52cf72528bae3582e/mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec", size = 56095, upload-time = "2025-01-25T08:37:53.621Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/d7/7b39307fc9db867b2a9a20c58b0de33b778dd6c55e116af8ea031f1433ba/mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a", size = 40512, upload-time = "2025-01-25T08:37:54.972Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/85/728ca68280d8ccc60c113ad119df70ff1748fbd44c89911fed0501faf0b8/mmh3-5.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4ba8cac21e1f2d4e436ce03a82a7f87cda80378691f760e9ea55045ec480a3d", size = 40110, upload-time = "2025-01-25T08:37:57.86Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/96/beaf0e301472ffa00358bbbf771fe2d9c4d709a2fe30b1d929e569f8cbdf/mmh3-5.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d69281c281cb01994f054d862a6bb02a2e7acfe64917795c58934b0872b9ece4", size = 100151, upload-time = "2025-01-25T08:37:59.609Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/ee/9381f825c4e09ffafeffa213c3865c4bf7d39771640de33ab16f6faeb854/mmh3-5.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d05ed3962312fbda2a1589b97359d2467f677166952f6bd410d8c916a55febf", size = 106312, upload-time = "2025-01-25T08:38:02.102Z" },
+    { url = "https://files.pythonhosted.org/packages/67/dc/350a54bea5cf397d357534198ab8119cfd0d8e8bad623b520f9c290af985/mmh3-5.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78ae6a03f4cff4aa92ddd690611168856f8c33a141bd3e5a1e0a85521dc21ea0", size = 104232, upload-time = "2025-01-25T08:38:03.852Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/5d/2c6eb4a4ec2f7293b98a9c07cb8c64668330b46ff2b6511244339e69a7af/mmh3-5.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95f983535b39795d9fb7336438faae117424c6798f763d67c6624f6caf2c4c01", size = 91663, upload-time = "2025-01-25T08:38:06.24Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/ac/17030d24196f73ecbab8b5033591e5e0e2beca103181a843a135c78f4fee/mmh3-5.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d46fdd80d4c7ecadd9faa6181e92ccc6fe91c50991c9af0e371fdf8b8a7a6150", size = 99166, upload-time = "2025-01-25T08:38:07.988Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/ed/54ddc56603561a10b33da9b12e95a48a271d126f4a4951841bbd13145ebf/mmh3-5.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0f16e976af7365ea3b5c425124b2a7f0147eed97fdbb36d99857f173c8d8e096", size = 101555, upload-time = "2025-01-25T08:38:09.821Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/c3/33fb3a940c9b70908a5cc9fcc26534aff8698180f9f63ab6b7cc74da8bcd/mmh3-5.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:6fa97f7d1e1f74ad1565127229d510f3fd65d931fdedd707c1e15100bc9e5ebb", size = 94813, upload-time = "2025-01-25T08:38:11.682Z" },
+    { url = "https://files.pythonhosted.org/packages/61/88/c9ff76a23abe34db8eee1a6fa4e449462a16c7eb547546fc5594b0860a72/mmh3-5.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4052fa4a8561bd62648e9eb993c8f3af3bdedadf3d9687aa4770d10e3709a80c", size = 109611, upload-time = "2025-01-25T08:38:12.602Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/8e/27d04f40e95554ebe782cac7bddda2d158cf3862387298c9c7b254fa7beb/mmh3-5.1.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:3f0e8ae9f961037f812afe3cce7da57abf734285961fffbeff9a4c011b737732", size = 100515, upload-time = "2025-01-25T08:38:16.407Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/00/504ca8f462f01048f3c87cd93f2e1f60b93dac2f930cd4ed73532a9337f5/mmh3-5.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:99297f207db967814f1f02135bb7fe7628b9eacb046134a34e1015b26b06edce", size = 100177, upload-time = "2025-01-25T08:38:18.186Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/1d/2efc3525fe6fdf8865972fcbb884bd1f4b0f923c19b80891cecf7e239fa5/mmh3-5.1.0-cp310-cp310-win32.whl", hash = "sha256:2e6c8dc3631a5e22007fbdb55e993b2dbce7985c14b25b572dd78403c2e79182", size = 40815, upload-time = "2025-01-25T08:38:19.176Z" },
+    { url = "https://files.pythonhosted.org/packages/38/b5/c8fbe707cb0fea77a6d2d58d497bc9b67aff80deb84d20feb34d8fdd8671/mmh3-5.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:e4e8c7ad5a4dddcfde35fd28ef96744c1ee0f9d9570108aa5f7e77cf9cfdf0bf", size = 41479, upload-time = "2025-01-25T08:38:21.098Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/f1/663e16134f913fccfbcea5b300fb7dc1860d8f63dc71867b013eebc10aec/mmh3-5.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:45da549269883208912868a07d0364e1418d8292c4259ca11699ba1b2475bd26", size = 38883, upload-time = "2025-01-25T08:38:22.013Z" },
+    { url = "https://files.pythonhosted.org/packages/56/09/fda7af7fe65928262098382e3bf55950cfbf67d30bf9e47731bf862161e9/mmh3-5.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0b529dcda3f951ff363a51d5866bc6d63cf57f1e73e8961f864ae5010647079d", size = 56098, upload-time = "2025-01-25T08:38:22.917Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ab/84c7bc3f366d6f3bd8b5d9325a10c367685bc17c26dac4c068e2001a4671/mmh3-5.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db1079b3ace965e562cdfc95847312f9273eb2ad3ebea983435c8423e06acd7", size = 40513, upload-time = "2025-01-25T08:38:25.079Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/21/25ea58ca4a652bdc83d1528bec31745cce35802381fb4fe3c097905462d2/mmh3-5.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:22d31e3a0ff89b8eb3b826d6fc8e19532998b2aa6b9143698043a1268da413e1", size = 40112, upload-time = "2025-01-25T08:38:25.947Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/78/4f12f16ae074ddda6f06745254fdb50f8cf3c85b0bbf7eaca58bed84bf58/mmh3-5.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2139bfbd354cd6cb0afed51c4b504f29bcd687a3b1460b7e89498329cc28a894", size = 102632, upload-time = "2025-01-25T08:38:26.939Z" },
+    { url = "https://files.pythonhosted.org/packages/48/11/8f09dc999cf2a09b6138d8d7fc734efb7b7bfdd9adb9383380941caadff0/mmh3-5.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c8105c6a435bc2cd6ea2ef59558ab1a2976fd4a4437026f562856d08996673a", size = 108884, upload-time = "2025-01-25T08:38:29.159Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/91/e59a66538a3364176f6c3f7620eee0ab195bfe26f89a95cbcc7a1fb04b28/mmh3-5.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57730067174a7f36fcd6ce012fe359bd5510fdaa5fe067bc94ed03e65dafb769", size = 106835, upload-time = "2025-01-25T08:38:33.04Z" },
+    { url = "https://files.pythonhosted.org/packages/25/14/b85836e21ab90e5cddb85fe79c494ebd8f81d96a87a664c488cc9277668b/mmh3-5.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bde80eb196d7fdc765a318604ded74a4378f02c5b46c17aa48a27d742edaded2", size = 93688, upload-time = "2025-01-25T08:38:34.987Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/aa/8bc964067df9262740c95e4cde2d19f149f2224f426654e14199a9e47df6/mmh3-5.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9c8eddcb441abddeb419c16c56fd74b3e2df9e57f7aa2903221996718435c7a", size = 101569, upload-time = "2025-01-25T08:38:35.983Z" },
+    { url = "https://files.pythonhosted.org/packages/70/b6/1fb163cbf919046a64717466c00edabebece3f95c013853fec76dbf2df92/mmh3-5.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:99e07e4acafbccc7a28c076a847fb060ffc1406036bc2005acb1b2af620e53c3", size = 98483, upload-time = "2025-01-25T08:38:38.198Z" },
+    { url = "https://files.pythonhosted.org/packages/70/49/ba64c050dd646060f835f1db6b2cd60a6485f3b0ea04976e7a29ace7312e/mmh3-5.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9e25ba5b530e9a7d65f41a08d48f4b3fedc1e89c26486361166a5544aa4cad33", size = 96496, upload-time = "2025-01-25T08:38:39.257Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/07/f2751d6a0b535bb865e1066e9c6b80852571ef8d61bce7eb44c18720fbfc/mmh3-5.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:bb9bf7475b4d99156ce2f0cf277c061a17560c8c10199c910a680869a278ddc7", size = 105109, upload-time = "2025-01-25T08:38:40.395Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/02/30360a5a66f7abba44596d747cc1e6fb53136b168eaa335f63454ab7bb79/mmh3-5.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2a1b0878dd281ea3003368ab53ff6f568e175f1b39f281df1da319e58a19c23a", size = 98231, upload-time = "2025-01-25T08:38:42.141Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/60/8526b0c750ff4d7ae1266e68b795f14b97758a1d9fcc19f6ecabf9c55656/mmh3-5.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:25f565093ac8b8aefe0f61f8f95c9a9d11dd69e6a9e9832ff0d293511bc36258", size = 97548, upload-time = "2025-01-25T08:38:43.402Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/4c/26e1222aca65769280d5427a1ce5875ef4213449718c8f03958d0bf91070/mmh3-5.1.0-cp311-cp311-win32.whl", hash = "sha256:1e3554d8792387eac73c99c6eaea0b3f884e7130eb67986e11c403e4f9b6d372", size = 40810, upload-time = "2025-01-25T08:38:45.143Z" },
+    { url = "https://files.pythonhosted.org/packages/98/d5/424ba95062d1212ea615dc8debc8d57983f2242d5e6b82e458b89a117a1e/mmh3-5.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:8ad777a48197882492af50bf3098085424993ce850bdda406a358b6ab74be759", size = 41476, upload-time = "2025-01-25T08:38:46.029Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/08/0315ccaf087ba55bb19a6dd3b1e8acd491e74ce7f5f9c4aaa06a90d66441/mmh3-5.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f29dc4efd99bdd29fe85ed6c81915b17b2ef2cf853abf7213a48ac6fb3eaabe1", size = 38880, upload-time = "2025-01-25T08:38:47.035Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/47/e5f452bdf16028bfd2edb4e2e35d0441e4a4740f30e68ccd4cfd2fb2c57e/mmh3-5.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:45712987367cb9235026e3cbf4334670522a97751abfd00b5bc8bfa022c3311d", size = 56152, upload-time = "2025-01-25T08:38:47.902Z" },
+    { url = "https://files.pythonhosted.org/packages/60/38/2132d537dc7a7fdd8d2e98df90186c7fcdbd3f14f95502a24ba443c92245/mmh3-5.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b1020735eb35086ab24affbea59bb9082f7f6a0ad517cb89f0fc14f16cea4dae", size = 40564, upload-time = "2025-01-25T08:38:48.839Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/2a/c52cf000581bfb8d94794f58865658e7accf2fa2e90789269d4ae9560b16/mmh3-5.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:babf2a78ce5513d120c358722a2e3aa7762d6071cd10cede026f8b32452be322", size = 40104, upload-time = "2025-01-25T08:38:49.773Z" },
+    { url = "https://files.pythonhosted.org/packages/83/33/30d163ce538c54fc98258db5621447e3ab208d133cece5d2577cf913e708/mmh3-5.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4f47f58cd5cbef968c84a7c1ddc192fef0a36b48b0b8a3cb67354531aa33b00", size = 102634, upload-time = "2025-01-25T08:38:51.5Z" },
+    { url = "https://files.pythonhosted.org/packages/94/5c/5a18acb6ecc6852be2d215c3d811aa61d7e425ab6596be940877355d7f3e/mmh3-5.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2044a601c113c981f2c1e14fa33adc9b826c9017034fe193e9eb49a6882dbb06", size = 108888, upload-time = "2025-01-25T08:38:52.542Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f6/11c556324c64a92aa12f28e221a727b6e082e426dc502e81f77056f6fc98/mmh3-5.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c94d999c9f2eb2da44d7c2826d3fbffdbbbbcde8488d353fee7c848ecc42b968", size = 106968, upload-time = "2025-01-25T08:38:54.286Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/61/ca0c196a685aba7808a5c00246f17b988a9c4f55c594ee0a02c273e404f3/mmh3-5.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a015dcb24fa0c7a78f88e9419ac74f5001c1ed6a92e70fd1803f74afb26a4c83", size = 93771, upload-time = "2025-01-25T08:38:55.576Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/55/0927c33528710085ee77b808d85bbbafdb91a1db7c8eaa89cac16d6c513e/mmh3-5.1.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:457da019c491a2d20e2022c7d4ce723675e4c081d9efc3b4d8b9f28a5ea789bd", size = 101726, upload-time = "2025-01-25T08:38:56.654Z" },
+    { url = "https://files.pythonhosted.org/packages/49/39/a92c60329fa470f41c18614a93c6cd88821412a12ee78c71c3f77e1cfc2d/mmh3-5.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:71408579a570193a4ac9c77344d68ddefa440b00468a0b566dcc2ba282a9c559", size = 98523, upload-time = "2025-01-25T08:38:57.662Z" },
+    { url = "https://files.pythonhosted.org/packages/81/90/26adb15345af8d9cf433ae1b6adcf12e0a4cad1e692de4fa9f8e8536c5ae/mmh3-5.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8b3a04bc214a6e16c81f02f855e285c6df274a2084787eeafaa45f2fbdef1b63", size = 96628, upload-time = "2025-01-25T08:38:59.505Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/4d/340d1e340df972a13fd4ec84c787367f425371720a1044220869c82364e9/mmh3-5.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:832dae26a35514f6d3c1e267fa48e8de3c7b978afdafa0529c808ad72e13ada3", size = 105190, upload-time = "2025-01-25T08:39:00.483Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/7c/65047d1cccd3782d809936db446430fc7758bda9def5b0979887e08302a2/mmh3-5.1.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bf658a61fc92ef8a48945ebb1076ef4ad74269e353fffcb642dfa0890b13673b", size = 98439, upload-time = "2025-01-25T08:39:01.484Z" },
+    { url = "https://files.pythonhosted.org/packages/72/d2/3c259d43097c30f062050f7e861075099404e8886b5d4dd3cebf180d6e02/mmh3-5.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3313577453582b03383731b66447cdcdd28a68f78df28f10d275d7d19010c1df", size = 97780, upload-time = "2025-01-25T08:39:02.444Z" },
+    { url = "https://files.pythonhosted.org/packages/29/29/831ea8d4abe96cdb3e28b79eab49cac7f04f9c6b6e36bfc686197ddba09d/mmh3-5.1.0-cp312-cp312-win32.whl", hash = "sha256:1d6508504c531ab86c4424b5a5ff07c1132d063863339cf92f6657ff7a580f76", size = 40835, upload-time = "2025-01-25T08:39:03.369Z" },
+    { url = "https://files.pythonhosted.org/packages/12/dd/7cbc30153b73f08eeac43804c1dbc770538a01979b4094edbe1a4b8eb551/mmh3-5.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:aa75981fcdf3f21759d94f2c81b6a6e04a49dfbcdad88b152ba49b8e20544776", size = 41509, upload-time = "2025-01-25T08:39:04.284Z" },
+    { url = "https://files.pythonhosted.org/packages/80/9d/627375bab4c90dd066093fc2c9a26b86f87e26d980dbf71667b44cbee3eb/mmh3-5.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:a4c1a76808dfea47f7407a0b07aaff9087447ef6280716fd0783409b3088bb3c", size = 38888, upload-time = "2025-01-25T08:39:05.174Z" },
+    { url = "https://files.pythonhosted.org/packages/05/06/a098a42870db16c0a54a82c56a5bdc873de3165218cd5b3ca59dbc0d31a7/mmh3-5.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a523899ca29cfb8a5239618474a435f3d892b22004b91779fcb83504c0d5b8c", size = 56165, upload-time = "2025-01-25T08:39:06.887Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/65/eaada79a67fde1f43e1156d9630e2fb70655e1d3f4e8f33d7ffa31eeacfd/mmh3-5.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:17cef2c3a6ca2391ca7171a35ed574b5dab8398163129a3e3a4c05ab85a4ff40", size = 40569, upload-time = "2025-01-25T08:39:07.945Z" },
+    { url = "https://files.pythonhosted.org/packages/36/7e/2b6c43ed48be583acd68e34d16f19209a9f210e4669421b0321e326d8554/mmh3-5.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:52e12895b30110f3d89dae59a888683cc886ed0472dd2eca77497edef6161997", size = 40104, upload-time = "2025-01-25T08:39:09.598Z" },
+    { url = "https://files.pythonhosted.org/packages/11/2b/1f9e962fdde8e41b0f43d22c8ba719588de8952f9376df7d73a434827590/mmh3-5.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d6719045cda75c3f40397fc24ab67b18e0cb8f69d3429ab4c39763c4c608dd", size = 102497, upload-time = "2025-01-25T08:39:10.512Z" },
+    { url = "https://files.pythonhosted.org/packages/46/94/d6c5c3465387ba077cccdc028ab3eec0d86eed1eebe60dcf4d15294056be/mmh3-5.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d19fa07d303a91f8858982c37e6939834cb11893cb3ff20e6ee6fa2a7563826a", size = 108834, upload-time = "2025-01-25T08:39:11.568Z" },
+    { url = "https://files.pythonhosted.org/packages/34/1e/92c212bb81796b69dddfd50a8a8f4b26ab0d38fdaf1d3e8628a67850543b/mmh3-5.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:31b47a620d622fbde8ca1ca0435c5d25de0ac57ab507209245e918128e38e676", size = 106936, upload-time = "2025-01-25T08:39:12.638Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/41/f2f494bbff3aad5ffd2085506255049de76cde51ddac84058e32768acc79/mmh3-5.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00f810647c22c179b6821079f7aa306d51953ac893587ee09cf1afb35adf87cb", size = 93709, upload-time = "2025-01-25T08:39:14.071Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/a9/a2cc4a756d73d9edf4fb85c76e16fd56b0300f8120fd760c76b28f457730/mmh3-5.1.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6128b610b577eed1e89ac7177ab0c33d06ade2aba93f5c89306032306b5f1c6", size = 101623, upload-time = "2025-01-25T08:39:15.507Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/6f/b9d735533b6a56b2d56333ff89be6a55ac08ba7ff33465feb131992e33eb/mmh3-5.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1e550a45d2ff87a1c11b42015107f1778c93f4c6f8e731bf1b8fa770321b8cc4", size = 98521, upload-time = "2025-01-25T08:39:16.77Z" },
+    { url = "https://files.pythonhosted.org/packages/99/47/dff2b54fac0d421c1e6ecbd2d9c85b2d0e6f6ee0d10b115d9364116a511e/mmh3-5.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:785ae09276342f79fd8092633e2d52c0f7c44d56e8cfda8274ccc9b76612dba2", size = 96696, upload-time = "2025-01-25T08:39:17.805Z" },
+    { url = "https://files.pythonhosted.org/packages/be/43/9e205310f47c43ddf1575bb3a1769c36688f30f1ac105e0f0c878a29d2cd/mmh3-5.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0f4be3703a867ef976434afd3661a33884abe73ceb4ee436cac49d3b4c2aaa7b", size = 105234, upload-time = "2025-01-25T08:39:18.908Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/44/90b11fd2b67dcb513f5bfe9b476eb6ca2d5a221c79b49884dc859100905e/mmh3-5.1.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e513983830c4ff1f205ab97152a0050cf7164f1b4783d702256d39c637b9d107", size = 98449, upload-time = "2025-01-25T08:39:20.719Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/d0/25c4b0c7b8e49836541059b28e034a4cccd0936202800d43a1cc48495ecb/mmh3-5.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9135c300535c828c0bae311b659f33a31c941572eae278568d1a953c4a57b59", size = 97796, upload-time = "2025-01-25T08:39:22.453Z" },
+    { url = "https://files.pythonhosted.org/packages/23/fa/cbbb7fcd0e287a715f1cd28a10de94c0535bd94164e38b852abc18da28c6/mmh3-5.1.0-cp313-cp313-win32.whl", hash = "sha256:c65dbd12885a5598b70140d24de5839551af5a99b29f9804bb2484b29ef07692", size = 40828, upload-time = "2025-01-25T08:39:23.372Z" },
+    { url = "https://files.pythonhosted.org/packages/09/33/9fb90ef822f7b734955a63851907cf72f8a3f9d8eb3c5706bfa6772a2a77/mmh3-5.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:10db7765201fc65003fa998faa067417ef6283eb5f9bba8f323c48fd9c33e91f", size = 41504, upload-time = "2025-01-25T08:39:24.286Z" },
+    { url = "https://files.pythonhosted.org/packages/16/71/4ad9a42f2772793a03cb698f0fc42499f04e6e8d2560ba2f7da0fb059a8e/mmh3-5.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:b22fe2e54be81f6c07dcb36b96fa250fb72effe08aa52fbb83eade6e1e2d5fd7", size = 38890, upload-time = "2025-01-25T08:39:25.28Z" },
+]
+
 [[package]]
 name = "multidict"
 version = "6.4.4"
@@ -894,7 +1111,8 @@ name = "networkx"
 version = "3.5"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "python_full_version >= '3.11'",
+    "python_full_version >= '3.12'",
+    "python_full_version == '3.11.*'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" }
 wheels = [
@@ -970,6 +1188,10 @@ dependencies = [
     { name = "matplotlib" },
     { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "pandas" },
+    { name = "polars" },
+    { name = "pyarrow" },
+    { name = "pyyaml" },
     { name = "typing-extensions" },
     { name = "xxhash" },
 ]
@@ -981,18 +1203,25 @@ redis = [
 
 [package.dev-dependencies]
 dev = [
+    { name = "deltalake" },
     { name = "httpie" },
     { name = "ipykernel" },
+    { name = "pyiceberg" },
     { name = "pytest" },
     { name = "pytest-cov" },
     { name = "redis" },
     { name = "ruff" },
+    { name = "tqdm" },
 ]
 
 [package.metadata]
 requires-dist = [
     { name = "matplotlib", specifier = ">=3.10.3" },
     { name = "networkx" },
+    { name = "pandas", specifier = ">=2.2.3" },
+    { name = "polars", specifier = ">=1.30.0" },
+    { name = "pyarrow", specifier = ">=20.0.0" },
+    { name = "pyyaml", specifier = ">=6.0.2" },
     { name = "redis", marker = "extra == 'redis'", specifier = ">=6.2.0" },
     { name = "typing-extensions" },
     { name = "xxhash" },
@@ -1001,12 +1230,15 @@ provides-extras = ["redis"]
 
 [package.metadata.requires-dev]
 dev = [
+    { name = "deltalake", specifier = ">=1.0.2" },
     { name = "httpie", specifier = ">=3.2.4" },
     { name = "ipykernel", specifier = ">=6.29.5" },
+    { name = "pyiceberg", specifier = ">=0.9.1" },
     { name = "pytest", specifier = ">=8.3.5" },
     { name = "pytest-cov", specifier = ">=6.1.1" },
     { name = "redis", specifier = ">=6.2.0" },
     { name = "ruff", specifier = ">=0.11.11" },
+    { name = "tqdm", specifier = ">=4.67.1" },
 ]
 
 [[package]]
@@ -1018,6 +1250,54 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
 ]
 
+[[package]]
+name = "pandas"
+version = "2.2.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "python-dateutil" },
+    { name = "pytz" },
+    { name = "tzdata" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload-time = "2024-09-20T13:10:04.827Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/aa/70/c853aec59839bceed032d52010ff5f1b8d87dc3114b762e4ba2727661a3b/pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5", size = 12580827, upload-time = "2024-09-20T13:08:42.347Z" },
+    { url = "https://files.pythonhosted.org/packages/99/f2/c4527768739ffa4469b2b4fff05aa3768a478aed89a2f271a79a40eee984/pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348", size = 11303897, upload-time = "2024-09-20T13:08:45.807Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/12/86c1747ea27989d7a4064f806ce2bae2c6d575b950be087837bdfcabacc9/pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed", size = 66480908, upload-time = "2024-09-20T18:37:13.513Z" },
+    { url = "https://files.pythonhosted.org/packages/44/50/7db2cd5e6373ae796f0ddad3675268c8d59fb6076e66f0c339d61cea886b/pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57", size = 13064210, upload-time = "2024-09-20T13:08:48.325Z" },
+    { url = "https://files.pythonhosted.org/packages/61/61/a89015a6d5536cb0d6c3ba02cebed51a95538cf83472975275e28ebf7d0c/pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42", size = 16754292, upload-time = "2024-09-20T19:01:54.443Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/0d/4cc7b69ce37fac07645a94e1d4b0880b15999494372c1523508511b09e40/pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f", size = 14416379, upload-time = "2024-09-20T13:08:50.882Z" },
+    { url = "https://files.pythonhosted.org/packages/31/9e/6ebb433de864a6cd45716af52a4d7a8c3c9aaf3a98368e61db9e69e69a9c/pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645", size = 11598471, upload-time = "2024-09-20T13:08:53.332Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222, upload-time = "2024-09-20T13:08:56.254Z" },
+    { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274, upload-time = "2024-09-20T13:08:58.645Z" },
+    { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836, upload-time = "2024-09-20T19:01:57.571Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505, upload-time = "2024-09-20T13:09:01.501Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420, upload-time = "2024-09-20T19:02:00.678Z" },
+    { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457, upload-time = "2024-09-20T13:09:04.105Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166, upload-time = "2024-09-20T13:09:06.917Z" },
+    { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload-time = "2024-09-20T13:09:09.655Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload-time = "2024-09-20T13:09:14.718Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload-time = "2024-09-20T19:02:03.88Z" },
+    { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload-time = "2024-09-20T13:09:17.621Z" },
+    { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload-time = "2024-09-20T19:02:07.094Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload-time = "2024-09-20T13:09:20.474Z" },
+    { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload-time = "2024-09-20T13:09:23.137Z" },
+    { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643, upload-time = "2024-09-20T13:09:25.522Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573, upload-time = "2024-09-20T13:09:28.012Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085, upload-time = "2024-09-20T19:02:10.451Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809, upload-time = "2024-09-20T13:09:30.814Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316, upload-time = "2024-09-20T19:02:13.825Z" },
+    { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055, upload-time = "2024-09-20T13:09:33.462Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175, upload-time = "2024-09-20T13:09:35.871Z" },
+    { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650, upload-time = "2024-09-20T13:09:38.685Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177, upload-time = "2024-09-20T13:09:41.141Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526, upload-time = "2024-09-20T19:02:16.905Z" },
+    { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013, upload-time = "2024-09-20T13:09:44.39Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620, upload-time = "2024-09-20T19:02:20.639Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" },
+]
+
 [[package]]
 name = "parso"
 version = "0.8.4"
@@ -1143,6 +1423,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
+[[package]]
+name = "polars"
+version = "1.30.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/b6/8dbdf626c0705a57f052708c9fc0860ffc2aa97955930d5faaf6a66fcfd3/polars-1.30.0.tar.gz", hash = "sha256:dfe94ae84a5efd9ba74e616e3e125b24ca155494a931890a8f17480737c4db45", size = 4668318, upload-time = "2025-05-21T13:33:24.175Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/48/e9b2cb379abcc9f7aff2e701098fcdb9fe6d85dc4ad4cec7b35d39c70951/polars-1.30.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4c33bc97c29b7112f0e689a2f8a33143973a3ff466c70b25c7fd1880225de6dd", size = 35704342, upload-time = "2025-05-21T13:32:22.996Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ca/f545f61282f75eea4dfde4db2944963dcd59abd50c20e33a1c894da44dad/polars-1.30.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:e3d05914c364b8e39a5b10dcf97e84d76e516b3b1693880bf189a93aab3ca00d", size = 32459857, upload-time = "2025-05-21T13:32:27.728Z" },
+    { url = "https://files.pythonhosted.org/packages/76/20/e018cd87d7cb6f8684355f31f4e193222455a6e8f7b942f4a2934f5969c7/polars-1.30.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a52af3862082b868c1febeae650af8ae8a2105d2cb28f0449179a7b44f54ccf", size = 36267243, upload-time = "2025-05-21T13:32:31.796Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/e7/b88b973021be07b13d91b9301cc14392c994225ef5107a32a8ffd3fd6424/polars-1.30.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:ffb3ef133454275d4254442257c5f71dd6e393ce365c97997dadeb6fa9d6d4b5", size = 33416871, upload-time = "2025-05-21T13:32:35.077Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/7c/d46d4381adeac537b8520b653dc30cb8b7edbf59883d71fbb989e9005de1/polars-1.30.0-cp39-abi3-win_amd64.whl", hash = "sha256:c26b633a9bd530c5fc09d317fca3bb3e16c772bd7df7549a9d8ec1934773cc5d", size = 36363630, upload-time = "2025-05-21T13:32:38.286Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/b5/5056d0c12aadb57390d0627492bef8b1abf3549474abb9ae0fd4e2bfa885/polars-1.30.0-cp39-abi3-win_arm64.whl", hash = "sha256:476f1bde65bc7b4d9f80af370645c2981b5798d67c151055e58534e89e96f2a8", size = 32643590, upload-time = "2025-05-21T13:32:42.107Z" },
+]
+
 [[package]]
 name = "prompt-toolkit"
 version = "3.0.51"
@@ -1188,6 +1482,59 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" },
 ]
 
+[[package]]
+name = "pyarrow"
+version = "20.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187, upload-time = "2025-04-27T12:34:23.264Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5b/23/77094eb8ee0dbe88441689cb6afc40ac312a1e15d3a7acc0586999518222/pyarrow-20.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c7dd06fd7d7b410ca5dc839cc9d485d2bc4ae5240851bcd45d85105cc90a47d7", size = 30832591, upload-time = "2025-04-27T12:27:27.89Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/d5/48cc573aff00d62913701d9fac478518f693b30c25f2c157550b0b2565cb/pyarrow-20.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d5382de8dc34c943249b01c19110783d0d64b207167c728461add1ecc2db88e4", size = 32273686, upload-time = "2025-04-27T12:27:36.816Z" },
+    { url = "https://files.pythonhosted.org/packages/37/df/4099b69a432b5cb412dd18adc2629975544d656df3d7fda6d73c5dba935d/pyarrow-20.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6415a0d0174487456ddc9beaead703d0ded5966129fa4fd3114d76b5d1c5ceae", size = 41337051, upload-time = "2025-04-27T12:27:44.4Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/27/99922a9ac1c9226f346e3a1e15e63dee6f623ed757ff2893f9d6994a69d3/pyarrow-20.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15aa1b3b2587e74328a730457068dc6c89e6dcbf438d4369f572af9d320a25ee", size = 42404659, upload-time = "2025-04-27T12:27:51.715Z" },
+    { url = "https://files.pythonhosted.org/packages/21/d1/71d91b2791b829c9e98f1e0d85be66ed93aff399f80abb99678511847eaa/pyarrow-20.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5605919fbe67a7948c1f03b9f3727d82846c053cd2ce9303ace791855923fd20", size = 40695446, upload-time = "2025-04-27T12:27:59.643Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/ca/ae10fba419a6e94329707487835ec721f5a95f3ac9168500bcf7aa3813c7/pyarrow-20.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a5704f29a74b81673d266e5ec1fe376f060627c2e42c5c7651288ed4b0db29e9", size = 42278528, upload-time = "2025-04-27T12:28:07.297Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/a6/aba40a2bf01b5d00cf9cd16d427a5da1fad0fb69b514ce8c8292ab80e968/pyarrow-20.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:00138f79ee1b5aca81e2bdedb91e3739b987245e11fa3c826f9e57c5d102fb75", size = 42918162, upload-time = "2025-04-27T12:28:15.716Z" },
+    { url = "https://files.pythonhosted.org/packages/93/6b/98b39650cd64f32bf2ec6d627a9bd24fcb3e4e6ea1873c5e1ea8a83b1a18/pyarrow-20.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f2d67ac28f57a362f1a2c1e6fa98bfe2f03230f7e15927aecd067433b1e70ce8", size = 44550319, upload-time = "2025-04-27T12:28:27.026Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/32/340238be1eb5037e7b5de7e640ee22334417239bc347eadefaf8c373936d/pyarrow-20.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:4a8b029a07956b8d7bd742ffca25374dd3f634b35e46cc7a7c3fa4c75b297191", size = 25770759, upload-time = "2025-04-27T12:28:33.702Z" },
+    { url = "https://files.pythonhosted.org/packages/47/a2/b7930824181ceadd0c63c1042d01fa4ef63eee233934826a7a2a9af6e463/pyarrow-20.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:24ca380585444cb2a31324c546a9a56abbe87e26069189e14bdba19c86c049f0", size = 30856035, upload-time = "2025-04-27T12:28:40.78Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/18/c765770227d7f5bdfa8a69f64b49194352325c66a5c3bb5e332dfd5867d9/pyarrow-20.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:95b330059ddfdc591a3225f2d272123be26c8fa76e8c9ee1a77aad507361cfdb", size = 32309552, upload-time = "2025-04-27T12:28:47.051Z" },
+    { url = "https://files.pythonhosted.org/packages/44/fb/dfb2dfdd3e488bb14f822d7335653092dde150cffc2da97de6e7500681f9/pyarrow-20.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f0fb1041267e9968c6d0d2ce3ff92e3928b243e2b6d11eeb84d9ac547308232", size = 41334704, upload-time = "2025-04-27T12:28:55.064Z" },
+    { url = "https://files.pythonhosted.org/packages/58/0d/08a95878d38808051a953e887332d4a76bc06c6ee04351918ee1155407eb/pyarrow-20.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8ff87cc837601532cc8242d2f7e09b4e02404de1b797aee747dd4ba4bd6313f", size = 42399836, upload-time = "2025-04-27T12:29:02.13Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/cd/efa271234dfe38f0271561086eedcad7bc0f2ddd1efba423916ff0883684/pyarrow-20.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:7a3a5dcf54286e6141d5114522cf31dd67a9e7c9133d150799f30ee302a7a1ab", size = 40711789, upload-time = "2025-04-27T12:29:09.951Z" },
+    { url = "https://files.pythonhosted.org/packages/46/1f/7f02009bc7fc8955c391defee5348f510e589a020e4b40ca05edcb847854/pyarrow-20.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a6ad3e7758ecf559900261a4df985662df54fb7fdb55e8e3b3aa99b23d526b62", size = 42301124, upload-time = "2025-04-27T12:29:17.187Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/92/692c562be4504c262089e86757a9048739fe1acb4024f92d39615e7bab3f/pyarrow-20.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6bb830757103a6cb300a04610e08d9636f0cd223d32f388418ea893a3e655f1c", size = 42916060, upload-time = "2025-04-27T12:29:24.253Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/ec/9f5c7e7c828d8e0a3c7ef50ee62eca38a7de2fa6eb1b8fa43685c9414fef/pyarrow-20.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:96e37f0766ecb4514a899d9a3554fadda770fb57ddf42b63d80f14bc20aa7db3", size = 44547640, upload-time = "2025-04-27T12:29:32.782Z" },
+    { url = "https://files.pythonhosted.org/packages/54/96/46613131b4727f10fd2ffa6d0d6f02efcc09a0e7374eff3b5771548aa95b/pyarrow-20.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:3346babb516f4b6fd790da99b98bed9708e3f02e734c84971faccb20736848dc", size = 25781491, upload-time = "2025-04-27T12:29:38.464Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/d6/0c10e0d54f6c13eb464ee9b67a68b8c71bcf2f67760ef5b6fbcddd2ab05f/pyarrow-20.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba", size = 30815067, upload-time = "2025-04-27T12:29:44.384Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/e2/04e9874abe4094a06fd8b0cbb0f1312d8dd7d707f144c2ec1e5e8f452ffa/pyarrow-20.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781", size = 32297128, upload-time = "2025-04-27T12:29:52.038Z" },
+    { url = "https://files.pythonhosted.org/packages/31/fd/c565e5dcc906a3b471a83273039cb75cb79aad4a2d4a12f76cc5ae90a4b8/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199", size = 41334890, upload-time = "2025-04-27T12:29:59.452Z" },
+    { url = "https://files.pythonhosted.org/packages/af/a9/3bdd799e2c9b20c1ea6dc6fa8e83f29480a97711cf806e823f808c2316ac/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd", size = 42421775, upload-time = "2025-04-27T12:30:06.875Z" },
+    { url = "https://files.pythonhosted.org/packages/10/f7/da98ccd86354c332f593218101ae56568d5dcedb460e342000bd89c49cc1/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28", size = 40687231, upload-time = "2025-04-27T12:30:13.954Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/1b/2168d6050e52ff1e6cefc61d600723870bf569cbf41d13db939c8cf97a16/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8", size = 42295639, upload-time = "2025-04-27T12:30:21.949Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/66/2d976c0c7158fd25591c8ca55aee026e6d5745a021915a1835578707feb3/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e", size = 42908549, upload-time = "2025-04-27T12:30:29.551Z" },
+    { url = "https://files.pythonhosted.org/packages/31/a9/dfb999c2fc6911201dcbf348247f9cc382a8990f9ab45c12eabfd7243a38/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a", size = 44557216, upload-time = "2025-04-27T12:30:36.977Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/8e/9adee63dfa3911be2382fb4d92e4b2e7d82610f9d9f668493bebaa2af50f/pyarrow-20.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b", size = 25660496, upload-time = "2025-04-27T12:30:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/aa/daa413b81446d20d4dad2944110dcf4cf4f4179ef7f685dd5a6d7570dc8e/pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893", size = 30798501, upload-time = "2025-04-27T12:30:48.351Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/75/2303d1caa410925de902d32ac215dc80a7ce7dd8dfe95358c165f2adf107/pyarrow-20.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061", size = 32277895, upload-time = "2025-04-27T12:30:55.238Z" },
+    { url = "https://files.pythonhosted.org/packages/92/41/fe18c7c0b38b20811b73d1bdd54b1fccba0dab0e51d2048878042d84afa8/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae", size = 41327322, upload-time = "2025-04-27T12:31:05.587Z" },
+    { url = "https://files.pythonhosted.org/packages/da/ab/7dbf3d11db67c72dbf36ae63dcbc9f30b866c153b3a22ef728523943eee6/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4", size = 42411441, upload-time = "2025-04-27T12:31:15.675Z" },
+    { url = "https://files.pythonhosted.org/packages/90/c3/0c7da7b6dac863af75b64e2f827e4742161128c350bfe7955b426484e226/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5", size = 40677027, upload-time = "2025-04-27T12:31:24.631Z" },
+    { url = "https://files.pythonhosted.org/packages/be/27/43a47fa0ff9053ab5203bb3faeec435d43c0d8bfa40179bfd076cdbd4e1c/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b", size = 42281473, upload-time = "2025-04-27T12:31:31.311Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/0b/d56c63b078876da81bbb9ba695a596eabee9b085555ed12bf6eb3b7cab0e/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3", size = 42893897, upload-time = "2025-04-27T12:31:39.406Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ac/7d4bd020ba9145f354012838692d48300c1b8fe5634bfda886abcada67ed/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368", size = 44543847, upload-time = "2025-04-27T12:31:45.997Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/07/290f4abf9ca702c5df7b47739c1b2c83588641ddfa2cc75e34a301d42e55/pyarrow-20.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031", size = 25653219, upload-time = "2025-04-27T12:31:54.11Z" },
+    { url = "https://files.pythonhosted.org/packages/95/df/720bb17704b10bd69dde086e1400b8eefb8f58df3f8ac9cff6c425bf57f1/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63", size = 30853957, upload-time = "2025-04-27T12:31:59.215Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/72/0d5f875efc31baef742ba55a00a25213a19ea64d7176e0fe001c5d8b6e9a/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c", size = 32247972, upload-time = "2025-04-27T12:32:05.369Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/bc/e48b4fa544d2eea72f7844180eb77f83f2030b84c8dad860f199f94307ed/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70", size = 41256434, upload-time = "2025-04-27T12:32:11.814Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/01/974043a29874aa2cf4f87fb07fd108828fc7362300265a2a64a94965e35b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b", size = 42353648, upload-time = "2025-04-27T12:32:20.766Z" },
+    { url = "https://files.pythonhosted.org/packages/68/95/cc0d3634cde9ca69b0e51cbe830d8915ea32dda2157560dda27ff3b3337b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122", size = 40619853, upload-time = "2025-04-27T12:32:28.1Z" },
+    { url = "https://files.pythonhosted.org/packages/29/c2/3ad40e07e96a3e74e7ed7cc8285aadfa84eb848a798c98ec0ad009eb6bcc/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6", size = 42241743, upload-time = "2025-04-27T12:32:35.792Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/cb/65fa110b483339add6a9bc7b6373614166b14e20375d4daa73483755f830/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c", size = 42839441, upload-time = "2025-04-27T12:32:46.64Z" },
+    { url = "https://files.pythonhosted.org/packages/98/7b/f30b1954589243207d7a0fbc9997401044bf9a033eec78f6cb50da3f304a/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a", size = 44503279, upload-time = "2025-04-27T12:32:56.503Z" },
+    { url = "https://files.pythonhosted.org/packages/37/40/ad395740cd641869a13bcf60851296c89624662575621968dcfafabaa7f6/pyarrow-20.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9", size = 25944982, upload-time = "2025-04-27T12:33:04.72Z" },
+]
+
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -1197,6 +1544,108 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552, upload-time = "2024-03-30T13:22:20.476Z" },
 ]
 
+[[package]]
+name = "pydantic"
+version = "2.11.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f0/86/8ce9040065e8f924d642c58e4a344e33163a07f6b57f836d0d734e0ad3fb/pydantic-2.11.5.tar.gz", hash = "sha256:7f853db3d0ce78ce8bbb148c401c2cdd6431b3473c0cdff2755c7690952a7b7a", size = 787102, upload-time = "2025-05-22T21:18:08.761Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b5/69/831ed22b38ff9b4b64b66569f0e5b7b97cf3638346eb95a2147fdb49ad5f/pydantic-2.11.5-py3-none-any.whl", hash = "sha256:f9c26ba06f9747749ca1e5c94d6a85cb84254577553c8785576fd38fa64dc0f7", size = 444229, upload-time = "2025-05-22T21:18:06.329Z" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.33.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195, upload-time = "2025-04-23T18:33:52.104Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/92/b31726561b5dae176c2d2c2dc43a9c5bfba5d32f96f8b4c0a600dd492447/pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8", size = 2028817, upload-time = "2025-04-23T18:30:43.919Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/44/3f0b95fafdaca04a483c4e685fe437c6891001bf3ce8b2fded82b9ea3aa1/pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d", size = 1861357, upload-time = "2025-04-23T18:30:46.372Z" },
+    { url = "https://files.pythonhosted.org/packages/30/97/e8f13b55766234caae05372826e8e4b3b96e7b248be3157f53237682e43c/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d", size = 1898011, upload-time = "2025-04-23T18:30:47.591Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/a3/99c48cf7bafc991cc3ee66fd544c0aae8dc907b752f1dad2d79b1b5a471f/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572", size = 1982730, upload-time = "2025-04-23T18:30:49.328Z" },
+    { url = "https://files.pythonhosted.org/packages/de/8e/a5b882ec4307010a840fb8b58bd9bf65d1840c92eae7534c7441709bf54b/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02", size = 2136178, upload-time = "2025-04-23T18:30:50.907Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/bb/71e35fc3ed05af6834e890edb75968e2802fe98778971ab5cba20a162315/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b", size = 2736462, upload-time = "2025-04-23T18:30:52.083Z" },
+    { url = "https://files.pythonhosted.org/packages/31/0d/c8f7593e6bc7066289bbc366f2235701dcbebcd1ff0ef8e64f6f239fb47d/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2", size = 2005652, upload-time = "2025-04-23T18:30:53.389Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/7a/996d8bd75f3eda405e3dd219ff5ff0a283cd8e34add39d8ef9157e722867/pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a", size = 2113306, upload-time = "2025-04-23T18:30:54.661Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/84/daf2a6fb2db40ffda6578a7e8c5a6e9c8affb251a05c233ae37098118788/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac", size = 2073720, upload-time = "2025-04-23T18:30:56.11Z" },
+    { url = "https://files.pythonhosted.org/packages/77/fb/2258da019f4825128445ae79456a5499c032b55849dbd5bed78c95ccf163/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a", size = 2244915, upload-time = "2025-04-23T18:30:57.501Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/7a/925ff73756031289468326e355b6fa8316960d0d65f8b5d6b3a3e7866de7/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b", size = 2241884, upload-time = "2025-04-23T18:30:58.867Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/b0/249ee6d2646f1cdadcb813805fe76265745c4010cf20a8eba7b0e639d9b2/pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22", size = 1910496, upload-time = "2025-04-23T18:31:00.078Z" },
+    { url = "https://files.pythonhosted.org/packages/66/ff/172ba8f12a42d4b552917aa65d1f2328990d3ccfc01d5b7c943ec084299f/pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640", size = 1955019, upload-time = "2025-04-23T18:31:01.335Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/8d/71db63483d518cbbf290261a1fc2839d17ff89fce7089e08cad07ccfce67/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", size = 2028584, upload-time = "2025-04-23T18:31:03.106Z" },
+    { url = "https://files.pythonhosted.org/packages/24/2f/3cfa7244ae292dd850989f328722d2aef313f74ffc471184dc509e1e4e5a/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", size = 1855071, upload-time = "2025-04-23T18:31:04.621Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/d3/4ae42d33f5e3f50dd467761304be2fa0a9417fbf09735bc2cce003480f2a/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", size = 1897823, upload-time = "2025-04-23T18:31:06.377Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f3/aa5976e8352b7695ff808599794b1fba2a9ae2ee954a3426855935799488/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", size = 1983792, upload-time = "2025-04-23T18:31:07.93Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/7a/cda9b5a23c552037717f2b2a5257e9b2bfe45e687386df9591eff7b46d28/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", size = 2136338, upload-time = "2025-04-23T18:31:09.283Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/9f/b8f9ec8dd1417eb9da784e91e1667d58a2a4a7b7b34cf4af765ef663a7e5/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", size = 2730998, upload-time = "2025-04-23T18:31:11.7Z" },
+    { url = "https://files.pythonhosted.org/packages/47/bc/cd720e078576bdb8255d5032c5d63ee5c0bf4b7173dd955185a1d658c456/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", size = 2003200, upload-time = "2025-04-23T18:31:13.536Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/22/3602b895ee2cd29d11a2b349372446ae9727c32e78a94b3d588a40fdf187/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", size = 2113890, upload-time = "2025-04-23T18:31:15.011Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/e6/e3c5908c03cf00d629eb38393a98fccc38ee0ce8ecce32f69fc7d7b558a7/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d", size = 2073359, upload-time = "2025-04-23T18:31:16.393Z" },
+    { url = "https://files.pythonhosted.org/packages/12/e7/6a36a07c59ebefc8777d1ffdaf5ae71b06b21952582e4b07eba88a421c79/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", size = 2245883, upload-time = "2025-04-23T18:31:17.892Z" },
+    { url = "https://files.pythonhosted.org/packages/16/3f/59b3187aaa6cc0c1e6616e8045b284de2b6a87b027cce2ffcea073adf1d2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", size = 2241074, upload-time = "2025-04-23T18:31:19.205Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538, upload-time = "2025-04-23T18:31:20.541Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909, upload-time = "2025-04-23T18:31:22.371Z" },
+    { url = "https://files.pythonhosted.org/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786, upload-time = "2025-04-23T18:31:24.161Z" },
+    { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000, upload-time = "2025-04-23T18:31:25.863Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996, upload-time = "2025-04-23T18:31:27.341Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957, upload-time = "2025-04-23T18:31:28.956Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199, upload-time = "2025-04-23T18:31:31.025Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296, upload-time = "2025-04-23T18:31:32.514Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109, upload-time = "2025-04-23T18:31:33.958Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028, upload-time = "2025-04-23T18:31:39.095Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044, upload-time = "2025-04-23T18:31:41.034Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881, upload-time = "2025-04-23T18:31:42.757Z" },
+    { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034, upload-time = "2025-04-23T18:31:44.304Z" },
+    { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187, upload-time = "2025-04-23T18:31:45.891Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628, upload-time = "2025-04-23T18:31:47.819Z" },
+    { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866, upload-time = "2025-04-23T18:31:49.635Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" },
+    { url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688, upload-time = "2025-04-23T18:31:53.175Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808, upload-time = "2025-04-23T18:31:54.79Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580, upload-time = "2025-04-23T18:31:57.393Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859, upload-time = "2025-04-23T18:31:59.065Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810, upload-time = "2025-04-23T18:32:00.78Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498, upload-time = "2025-04-23T18:32:02.418Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611, upload-time = "2025-04-23T18:32:04.152Z" },
+    { url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924, upload-time = "2025-04-23T18:32:06.129Z" },
+    { url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196, upload-time = "2025-04-23T18:32:08.178Z" },
+    { url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389, upload-time = "2025-04-23T18:32:10.242Z" },
+    { url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223, upload-time = "2025-04-23T18:32:12.382Z" },
+    { url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473, upload-time = "2025-04-23T18:32:14.034Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269, upload-time = "2025-04-23T18:32:15.783Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921, upload-time = "2025-04-23T18:32:18.473Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" },
+    { url = "https://files.pythonhosted.org/packages/30/68/373d55e58b7e83ce371691f6eaa7175e3a24b956c44628eb25d7da007917/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa", size = 2023982, upload-time = "2025-04-23T18:32:53.14Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/16/145f54ac08c96a63d8ed6442f9dec17b2773d19920b627b18d4f10a061ea/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29", size = 1858412, upload-time = "2025-04-23T18:32:55.52Z" },
+    { url = "https://files.pythonhosted.org/packages/41/b1/c6dc6c3e2de4516c0bb2c46f6a373b91b5660312342a0cf5826e38ad82fa/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d", size = 1892749, upload-time = "2025-04-23T18:32:57.546Z" },
+    { url = "https://files.pythonhosted.org/packages/12/73/8cd57e20afba760b21b742106f9dbdfa6697f1570b189c7457a1af4cd8a0/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e", size = 2067527, upload-time = "2025-04-23T18:32:59.771Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/d5/0bb5d988cc019b3cba4a78f2d4b3854427fc47ee8ec8e9eaabf787da239c/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c", size = 2108225, upload-time = "2025-04-23T18:33:04.51Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/c5/00c02d1571913d496aabf146106ad8239dc132485ee22efe08085084ff7c/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec", size = 2069490, upload-time = "2025-04-23T18:33:06.391Z" },
+    { url = "https://files.pythonhosted.org/packages/22/a8/dccc38768274d3ed3a59b5d06f59ccb845778687652daa71df0cab4040d7/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052", size = 2237525, upload-time = "2025-04-23T18:33:08.44Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/e7/4f98c0b125dda7cf7ccd14ba936218397b44f50a56dd8c16a3091df116c3/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c", size = 2238446, upload-time = "2025-04-23T18:33:10.313Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/91/2ec36480fdb0b783cd9ef6795753c1dea13882f2e68e73bce76ae8c21e6a/pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808", size = 2066678, upload-time = "2025-04-23T18:33:12.224Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200, upload-time = "2025-04-23T18:33:14.199Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123, upload-time = "2025-04-23T18:33:16.555Z" },
+    { url = "https://files.pythonhosted.org/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852, upload-time = "2025-04-23T18:33:18.513Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/11/d37bdebbda2e449cb3f519f6ce950927b56d62f0b84fd9cb9e372a26a3d5/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", size = 2067484, upload-time = "2025-04-23T18:33:20.475Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/55/1f95f0a05ce72ecb02a8a8a1c3be0579bbc29b1d5ab68f1378b7bebc5057/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", size = 2108896, upload-time = "2025-04-23T18:33:22.501Z" },
+    { url = "https://files.pythonhosted.org/packages/53/89/2b2de6c81fa131f423246a9109d7b2a375e83968ad0800d6e57d0574629b/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", size = 2069475, upload-time = "2025-04-23T18:33:24.528Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013, upload-time = "2025-04-23T18:33:26.621Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715, upload-time = "2025-04-23T18:33:28.656Z" },
+    { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757, upload-time = "2025-04-23T18:33:30.645Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.1"
@@ -1206,6 +1655,46 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" },
 ]
 
+[[package]]
+name = "pyiceberg"
+version = "0.9.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cachetools" },
+    { name = "click" },
+    { name = "fsspec" },
+    { name = "mmh3" },
+    { name = "pydantic" },
+    { name = "pyparsing" },
+    { name = "requests" },
+    { name = "rich" },
+    { name = "sortedcontainers" },
+    { name = "strictyaml" },
+    { name = "tenacity" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bd/6a/6c1ac381ff0b8e03a9abc2f05722f6002d7452a2c05118697b3f3910e171/pyiceberg-0.9.1.tar.gz", hash = "sha256:3634134ce33859a441768b39df179b2c6f3de2bbbf506622884f553b013ee799", size = 617629, upload-time = "2025-04-30T14:59:34.306Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ea/5d/bb10c86b85895d4ba471b8a0e187031d4aaa82592a639242b83dd9354861/pyiceberg-0.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a183d9217eb82159c01b23c683057f96c8b2375f592b921721d1c157895e2df", size = 527097, upload-time = "2025-04-30T14:58:52.39Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/b9/1d6f0d334bc51cd64a58b7320d521e54af3810a6bd748fe2e89db1ad8d5f/pyiceberg-0.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:57030bb15c397b0379242907c5611f5b4338fb799e972353fd0edafde6cfd2ef", size = 523267, upload-time = "2025-04-30T14:58:53.978Z" },
+    { url = "https://files.pythonhosted.org/packages/02/f5/bd43a9c1d2cd3aeb987cbf2b7f25e2b10306fa81522ea00df250fb23cc84/pyiceberg-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ba4cd9a8f6a04cfbc68e0c83f2db3ffd14244da8601a142cc05965d4b343645", size = 838616, upload-time = "2025-04-30T14:58:55.252Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/01/c68f9e03413dc983ddadc2c471038af2ff792449fc451731f58a958a7696/pyiceberg-0.9.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d5a48c6a2016d0dcde8c9079cc5e6b6d2e2ac663eddfe4697e7ea03a0edc40b7", size = 838290, upload-time = "2025-04-30T14:58:56.412Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/80/b7cba54a33b8b7be3655ff656d6bb8594fec0316eec5cafa231ec7f6ff74/pyiceberg-0.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:8bebfa5a804a95a9f3d98d88cbeb37430b09add04592238bba2a2b2e0466d60d", size = 523612, upload-time = "2025-04-30T14:58:59.507Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/75/c8b4ebba7d345b5e736ebf4976121b97dd7091dcad401a17ca57152704c5/pyiceberg-0.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0e75c502dd56ac3d77036ce8a3b2566348da5ff4367c7c671981616ef6dcc883", size = 566274, upload-time = "2025-04-30T14:59:00.626Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/a0/9494c7930e5e4dc951d95abba584d8ffdb7403368398796ede21ff25c26f/pyiceberg-0.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0a8189c9b3ba81dd12493d6bb874a656a4d4909904552b97a629d1d43b3a0e90", size = 560157, upload-time = "2025-04-30T14:59:02.082Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d4/351776b1ae83de187d7cf37b100f4f124c7a71e35337182d3aef308156d1/pyiceberg-0.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c03065d5c5b704444ab8fb18cdd232ec43994db95b9e53444008ebc2cf9dc2c", size = 1052290, upload-time = "2025-04-30T14:59:03.232Z" },
+    { url = "https://files.pythonhosted.org/packages/40/17/d8fea681afb52f20bf6a640f9044dcf621a47165f67cc5320bf3c6e82e4e/pyiceberg-0.9.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:93f2586a5da737de6e4643bf096a01772f068d1eedb7ffde6b36c60b6b9e6bd3", size = 1047503, upload-time = "2025-04-30T14:59:04.38Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/e0/d173fc2aa8dc252d7aac71703ba2c0491e4988b3a160cf5abb531cfb9086/pyiceberg-0.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:94e45c10051110ba7a43b85a1f0a680b4a31d1d6cee593c8e62e14d22d18c47d", size = 559491, upload-time = "2025-04-30T14:59:05.615Z" },
+    { url = "https://files.pythonhosted.org/packages/52/26/77983c2884b4a5f13f8a35e5c5e762ae699f6c511efd16730ab883000c1b/pyiceberg-0.9.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b8a958e3bbe919026533cee1f0fb6b7040928fce8d42c2ecea228de7c17578fa", size = 605755, upload-time = "2025-04-30T14:59:07.087Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/67/e6ea7fcc43aebc85aea5a67a69d01c9015283478061c3121b6b8aa158ce4/pyiceberg-0.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7e956b35c6822600c45fd8f3ea8cfea328cc406fefa534afeb6fdb325d05406", size = 597325, upload-time = "2025-04-30T14:59:08.644Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/cf/178a9f63fac1bfdd13bc85169e7ab903955d082e2cd80507b1921a6f64dc/pyiceberg-0.9.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e4e585164d7d86f5c9a609a1bc2abeae2f0ea0680a11a2064d3a945866b5311", size = 1277399, upload-time = "2025-04-30T14:59:10.193Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/6b/78d1739eb1d5b18529ee438aed75dac3e0b246f5e4d800931f9d1e37cda2/pyiceberg-0.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5fee08dac30e8524526f7d18468f9670f8606905b850b261314c597c6633f3b4", size = 1269083, upload-time = "2025-04-30T14:59:11.964Z" },
+    { url = "https://files.pythonhosted.org/packages/67/69/c0087d19c8d8e8530acee3ba485d54aedeebf2963784a16692ca4b439566/pyiceberg-0.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:124793c54a0c2fb5ac4ab19c38da116c068e277c85cbaa7e4064e635a70b595e", size = 595512, upload-time = "2025-04-30T14:59:14.464Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/62/0153ed3a39d6f4b3235d430123703d4684eec7ba780404bbc118ace7406a/pyiceberg-0.9.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:77aec1c77d675603e0c5358e74adcae8d13b323753d702011be3f309d26af355", size = 668261, upload-time = "2025-04-30T14:59:21.751Z" },
+    { url = "https://files.pythonhosted.org/packages/24/bd/c4cec142686dd8124032c69b6b02ba3703abc114ce787d0f02088b1f43d8/pyiceberg-0.9.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cf567438bf6267bbb67fdfdfc72ac500d523725fca9a6a38f93e8acd4146190e", size = 657439, upload-time = "2025-04-30T14:59:23.304Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/74/bbfc70bb1857f9d55d06fee1330a0236876b8ae4aa6fc5d815e2c4fef4f7/pyiceberg-0.9.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5992db7c00d789a33ff117700d453126803e769507a5edeb79bb6510ff72fc00", size = 1352983, upload-time = "2025-04-30T14:59:25.023Z" },
+    { url = "https://files.pythonhosted.org/packages/90/20/e33e1716d1368b2471b80d9f1e338110f1e781b34ebffc5e320523102ffc/pyiceberg-0.9.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c9e460fca26162a3822c0e8d50b49c80928a0e35cb41698748d7a26f8c016215", size = 657563, upload-time = "2025-04-30T14:59:27.004Z" },
+]
+
 [[package]]
 name = "pyparsing"
 version = "3.2.3"
@@ -1266,6 +1755,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
 ]
 
+[[package]]
+name = "pytz"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
+]
+
 [[package]]
 name = "pywin32"
 version = "310"
@@ -1285,6 +1783,50 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b4/f4/f785020090fb050e7fb6d34b780f2231f302609dc964672f72bfaeb59a28/pywin32-310-cp313-cp313-win_arm64.whl", hash = "sha256:e308f831de771482b7cf692a1f308f8fca701b2d8f9dde6cc440c7da17e47b33", size = 8458152, upload-time = "2025-03-17T00:56:07.819Z" },
 ]
 
+[[package]]
+name = "pyyaml"
+version = "6.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9b/95/a3fac87cb7158e231b5a6012e438c647e1a87f09f8e0d123acec8ab8bf71/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", size = 184199, upload-time = "2024-08-06T20:31:40.178Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/7a/68bd47624dab8fd4afbfd3c48e3b79efe09098ae941de5b58abcbadff5cb/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", size = 171758, upload-time = "2024-08-06T20:31:42.173Z" },
+    { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463, upload-time = "2024-08-06T20:31:44.263Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280, upload-time = "2024-08-06T20:31:50.199Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239, upload-time = "2024-08-06T20:31:52.292Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802, upload-time = "2024-08-06T20:31:53.836Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527, upload-time = "2024-08-06T20:31:55.565Z" },
+    { url = "https://files.pythonhosted.org/packages/be/aa/5afe99233fb360d0ff37377145a949ae258aaab831bde4792b32650a4378/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", size = 144052, upload-time = "2024-08-06T20:31:56.914Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/84/0fa4b06f6d6c958d207620fc60005e241ecedceee58931bb20138e1e5776/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", size = 161774, upload-time = "2024-08-06T20:31:58.304Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612, upload-time = "2024-08-06T20:32:03.408Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040, upload-time = "2024-08-06T20:32:04.926Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829, upload-time = "2024-08-06T20:32:06.459Z" },
+    { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167, upload-time = "2024-08-06T20:32:08.338Z" },
+    { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952, upload-time = "2024-08-06T20:32:14.124Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301, upload-time = "2024-08-06T20:32:16.17Z" },
+    { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638, upload-time = "2024-08-06T20:32:18.555Z" },
+    { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850, upload-time = "2024-08-06T20:32:19.889Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980, upload-time = "2024-08-06T20:32:21.273Z" },
+    { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873, upload-time = "2024-08-06T20:32:25.131Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302, upload-time = "2024-08-06T20:32:26.511Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154, upload-time = "2024-08-06T20:32:28.363Z" },
+    { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223, upload-time = "2024-08-06T20:32:30.058Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542, upload-time = "2024-08-06T20:32:31.881Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164, upload-time = "2024-08-06T20:32:37.083Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611, upload-time = "2024-08-06T20:32:38.898Z" },
+    { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591, upload-time = "2024-08-06T20:32:40.241Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload-time = "2024-08-06T20:32:43.4Z" },
+    { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679, upload-time = "2024-08-06T20:32:44.801Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428, upload-time = "2024-08-06T20:32:46.432Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361, upload-time = "2024-08-06T20:32:51.188Z" },
+    { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523, upload-time = "2024-08-06T20:32:53.019Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660, upload-time = "2024-08-06T20:32:54.708Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload-time = "2024-08-06T20:32:56.985Z" },
+    { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload-time = "2024-08-06T20:33:03.001Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
+]
+
 [[package]]
 name = "pyzmq"
 version = "26.4.0"
@@ -1404,16 +1946,16 @@ wheels = [
 
 [[package]]
 name = "rich"
-version = "14.0.0"
+version = "13.9.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "markdown-it-py" },
     { name = "pygments" },
     { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078, upload-time = "2025-03-30T14:15:14.23Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149, upload-time = "2024-11-01T16:43:57.873Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload-time = "2025-03-30T14:15:12.283Z" },
+    { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424, upload-time = "2024-11-01T16:43:55.817Z" },
 ]
 
 [[package]]
@@ -1459,6 +2001,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" },
+]
+
 [[package]]
 name = "stack-data"
 version = "0.6.3"
@@ -1473,6 +2024,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" },
 ]
 
+[[package]]
+name = "strictyaml"
+version = "1.7.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "python-dateutil" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/08/efd28d49162ce89c2ad61a88bd80e11fb77bc9f6c145402589112d38f8af/strictyaml-1.7.3.tar.gz", hash = "sha256:22f854a5fcab42b5ddba8030a0e4be51ca89af0267961c8d6cfa86395586c407", size = 115206, upload-time = "2023-03-10T12:50:27.062Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/96/7c/a81ef5ef10978dd073a854e0fa93b5d8021d0594b639cc8f6453c3c78a1d/strictyaml-1.7.3-py3-none-any.whl", hash = "sha256:fb5c8a4edb43bebb765959e420f9b3978d7f1af88c80606c03fb420888f5d1c7", size = 123917, upload-time = "2023-03-10T12:50:17.242Z" },
+]
+
+[[package]]
+name = "tenacity"
+version = "9.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" },
+]
+
 [[package]]
 name = "tomli"
 version = "2.2.1"
@@ -1531,6 +2103,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/55/a7/535c44c7bea4578e48281d83c615219f3ab19e6abc67625ef637c73987be/tornado-6.5.1-cp39-abi3-win_arm64.whl", hash = "sha256:02420a0eb7bf617257b9935e2b754d1b63897525d8a289c9d65690d580b4dcf7", size = 443596, upload-time = "2025-05-22T18:15:37.433Z" },
 ]
 
+[[package]]
+name = "tqdm"
+version = "4.67.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
+]
+
 [[package]]
 name = "traitlets"
 version = "5.14.3"
@@ -1549,6 +2133,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" },
 ]
 
+[[package]]
+name = "typing-inspection"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f8/b1/0c11f5058406b3af7609f121aaa6b609744687f1d158b3c3a5bf4cc94238/typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28", size = 75726, upload-time = "2025-05-21T18:55:23.885Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" },
+]
+
+[[package]]
+name = "tzdata"
+version = "2025.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" },
+]
+
 [[package]]
 name = "urllib3"
 version = "2.4.0"
@@ -1567,6 +2172,70 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" },
 ]
 
+[[package]]
+name = "wrapt"
+version = "1.17.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531, upload-time = "2025-01-14T10:35:45.465Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307, upload-time = "2025-01-14T10:33:13.616Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486, upload-time = "2025-01-14T10:33:15.947Z" },
+    { url = "https://files.pythonhosted.org/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777, upload-time = "2025-01-14T10:33:17.462Z" },
+    { url = "https://files.pythonhosted.org/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314, upload-time = "2025-01-14T10:33:21.282Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947, upload-time = "2025-01-14T10:33:24.414Z" },
+    { url = "https://files.pythonhosted.org/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778, upload-time = "2025-01-14T10:33:26.152Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716, upload-time = "2025-01-14T10:33:27.372Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548, upload-time = "2025-01-14T10:33:28.52Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334, upload-time = "2025-01-14T10:33:29.643Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427, upload-time = "2025-01-14T10:33:30.832Z" },
+    { url = "https://files.pythonhosted.org/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774, upload-time = "2025-01-14T10:33:32.897Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308, upload-time = "2025-01-14T10:33:33.992Z" },
+    { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488, upload-time = "2025-01-14T10:33:35.264Z" },
+    { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776, upload-time = "2025-01-14T10:33:38.28Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776, upload-time = "2025-01-14T10:33:40.678Z" },
+    { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420, upload-time = "2025-01-14T10:33:41.868Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199, upload-time = "2025-01-14T10:33:43.598Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307, upload-time = "2025-01-14T10:33:48.499Z" },
+    { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025, upload-time = "2025-01-14T10:33:51.191Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879, upload-time = "2025-01-14T10:33:52.328Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419, upload-time = "2025-01-14T10:33:53.551Z" },
+    { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773, upload-time = "2025-01-14T10:33:56.323Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799, upload-time = "2025-01-14T10:33:57.4Z" },
+    { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821, upload-time = "2025-01-14T10:33:59.334Z" },
+    { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919, upload-time = "2025-01-14T10:34:04.093Z" },
+    { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721, upload-time = "2025-01-14T10:34:07.163Z" },
+    { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899, upload-time = "2025-01-14T10:34:09.82Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222, upload-time = "2025-01-14T10:34:11.258Z" },
+    { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707, upload-time = "2025-01-14T10:34:12.49Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685, upload-time = "2025-01-14T10:34:15.043Z" },
+    { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567, upload-time = "2025-01-14T10:34:16.563Z" },
+    { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672, upload-time = "2025-01-14T10:34:17.727Z" },
+    { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865, upload-time = "2025-01-14T10:34:19.577Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800, upload-time = "2025-01-14T10:34:21.571Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824, upload-time = "2025-01-14T10:34:22.999Z" },
+    { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920, upload-time = "2025-01-14T10:34:25.386Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690, upload-time = "2025-01-14T10:34:28.058Z" },
+    { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861, upload-time = "2025-01-14T10:34:29.167Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174, upload-time = "2025-01-14T10:34:31.702Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721, upload-time = "2025-01-14T10:34:32.91Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763, upload-time = "2025-01-14T10:34:34.903Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585, upload-time = "2025-01-14T10:34:36.13Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676, upload-time = "2025-01-14T10:34:37.962Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871, upload-time = "2025-01-14T10:34:39.13Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312, upload-time = "2025-01-14T10:34:40.604Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062, upload-time = "2025-01-14T10:34:45.011Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155, upload-time = "2025-01-14T10:34:47.25Z" },
+    { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471, upload-time = "2025-01-14T10:34:50.934Z" },
+    { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208, upload-time = "2025-01-14T10:34:52.297Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339, upload-time = "2025-01-14T10:34:53.489Z" },
+    { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232, upload-time = "2025-01-14T10:34:55.327Z" },
+    { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476, upload-time = "2025-01-14T10:34:58.055Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377, upload-time = "2025-01-14T10:34:59.3Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986, upload-time = "2025-01-14T10:35:00.498Z" },
+    { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750, upload-time = "2025-01-14T10:35:03.378Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594, upload-time = "2025-01-14T10:35:44.018Z" },
+]
+
 [[package]]
 name = "xxhash"
 version = "3.5.0"