diff --git a/.gitignore b/.gitignore index e66a8b6..251b5d3 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,10 @@ +# Ignore data files in notebooks folder +notebooks/**/*.json +notebooks/**/*.yaml +notebooks/**/*.parquet +notebooks/**/*.pkl +notebooks/**/*.db + # Ignore vscode settings .vscode/ diff --git a/notebooks/02_orcabridge_basic_usage.ipynb b/notebooks/02_orcabridge_basic_usage.ipynb index 9b6b244..4c1f93a 100644 --- a/notebooks/02_orcabridge_basic_usage.ipynb +++ b/notebooks/02_orcabridge_basic_usage.ipynb @@ -803,9 +803,10 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "import tempfile\n", "import json\n", + "import tempfile\n", + "\n", + "import numpy as np\n", "\n", "\n", "def compute_stats(bin_file: PathLike, output_file=None):\n", diff --git a/notebooks/03_orcabridge_qol_features.ipynb b/notebooks/03_orcabridge_qol_features.ipynb index 38583c5..a28f686 100644 --- a/notebooks/03_orcabridge_qol_features.ipynb +++ b/notebooks/03_orcabridge_qol_features.ipynb @@ -100,11 +100,12 @@ "metadata": {}, "outputs": [], "source": [ - "from orcabridge.pod import function_pod\n", "import json\n", "import tempfile\n", "from pathlib import Path\n", "\n", + "from orcabridge.pod import function_pod\n", + "\n", "json_source = ob.GlobSource(\"json_file\", \"../examples/dataset2\", \"*.json\")\n", "\n", "\n", diff --git a/notebooks/04_orcabridge_tracker.ipynb b/notebooks/04_orcabridge_tracker.ipynb index f1b0b96..24fd052 100644 --- a/notebooks/04_orcabridge_tracker.ipynb +++ b/notebooks/04_orcabridge_tracker.ipynb @@ -23,10 +23,10 @@ "metadata": {}, "outputs": [], "source": [ - "from orcabridge.tracker import Tracker\n", + "from orcabridge.pod import function_pod\n", "from orcabridge.source import GlobSource\n", "from orcabridge.store import DirDataStore\n", - "from orcabridge.pod import function_pod" + "from orcabridge.tracker import Tracker" ] }, { @@ -62,9 +62,10 @@ "metadata": {}, "outputs": [], "source": [ + "import json\n", "import tempfile\n", "from pathlib import Path\n", - "import json\n", + "\n", "import yaml\n", "\n", "# use default data store location of `./pod_data`\n", diff --git a/notebooks/05_orcabridge_dj_integration.ipynb b/notebooks/05_orcabridge_dj_integration.ipynb index f843682..70cbeb8 100644 --- a/notebooks/05_orcabridge_dj_integration.ipynb +++ b/notebooks/05_orcabridge_dj_integration.ipynb @@ -35,14 +35,15 @@ "metadata": {}, "outputs": [], "source": [ - "from orcabridge.pod import function_pod\n", - "from orcabridge.source import GlobSource\n", - "from orcabridge.store import DirDataStore\n", + "import json\n", "import tempfile\n", "from pathlib import Path\n", - "import json\n", + "\n", "import yaml\n", "\n", + "from orcabridge.pod import function_pod\n", + "from orcabridge.source import GlobSource\n", + "from orcabridge.store import DirDataStore\n", "\n", "# define data source\n", "data_source = GlobSource(\n", @@ -162,9 +163,10 @@ } ], "source": [ - "from orcabridge.dj.tracker import QueryTracker\n", "import datajoint as dj\n", "\n", + "from orcabridge.dj.tracker import QueryTracker\n", + "\n", "schema = dj.schema(\"enigma_orcabridge_test\")\n", "\n", "\n", diff --git a/pyproject.toml b/pyproject.toml index c8f1179..fe1e914 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,9 +8,13 @@ description = "Function-based Oracapod Pipeline implementation in Python" dynamic = ["version"] dependencies = [ "xxhash", - "networkx", - "typing_extensions", - "matplotlib>=3.10.3", + "networkx", + "typing_extensions", + "matplotlib>=3.10.3", + "pandas>=2.2.3", + "pyyaml>=6.0.2", + "pyarrow>=20.0.0", + "polars>=1.30.0", ] readme = "README.md" requires-python = ">=3.10" @@ -36,10 +40,13 @@ version_file = "src/orcabridge/_version.py" [dependency-groups] dev = [ + "deltalake>=1.0.2", "httpie>=3.2.4", "ipykernel>=6.29.5", + "pyiceberg>=0.9.1", "pytest>=8.3.5", "pytest-cov>=6.1.1", "redis>=6.2.0", "ruff>=0.11.11", + "tqdm>=4.67.1", ] diff --git a/src/orcabridge/__init__.py b/src/orcabridge/__init__.py index 675892a..6da00a9 100644 --- a/src/orcabridge/__init__.py +++ b/src/orcabridge/__init__.py @@ -1,15 +1,9 @@ -from . import hashing -from . import pod -from . import mapper -from . import stream -from . import source -from . import store -from .mapper import MapTags, MapPackets, Join, tag, packet +from . import hashing, mappers, pod, sources, store, streams +from .mappers import Join, MapPackets, MapTags, packet, tag from .pod import FunctionPod, function_pod -from .source import GlobSource +from .sources import GlobSource from .store import DirDataStore, SafeDirDataStore -from .tracker import GraphTracker - +from .pipeline import GraphTracker DEFAULT_TRACKER = GraphTracker() DEFAULT_TRACKER.activate() @@ -20,9 +14,9 @@ "store", "pod", "dir_data_store", - "mapper", - "stream", - "source", + "mappers", + "streams", + "sources", "MapTags", "MapPackets", "Join", diff --git a/src/orcabridge/base.py b/src/orcabridge/base.py index f9c57c4..6a73048 100644 --- a/src/orcabridge/base.py +++ b/src/orcabridge/base.py @@ -1,11 +1,14 @@ -from orcabridge.hashing import HashableMixin -from orcabridge.types import Tag, Packet -from typing import Any +# Collection of base classes for operations and streams in the orcabridge framework. import threading -from collections.abc import Collection, Callable, Iterator +from abc import ABC, abstractmethod +from collections.abc import Callable, Collection, Iterator +from typing import Any + +from orcabridge.hashing import HashableMixin +from orcabridge.types import Packet, Tag -class Operation(HashableMixin): +class Operation(ABC, HashableMixin): """ Operation defines a generic operation that can be performed on a stream of data. It is a base class for all operations that can be performed on a collection of streams @@ -79,10 +82,24 @@ def __str__(self): return f"{self.__class__.__name__}({self._label})" return self.__class__.__name__ + def claims_unique_tags( + self, *streams: "SyncStream", trigger_run: bool = True + ) -> bool: + """ + Returns True if the operation claims that it has unique tags, False otherwise. + This method is useful for checking if the operation can be used as a source + for other operations that require unique tags. + Subclasses should override this method if it can provide reasonable check/guarantee + of unique tags. The default implementation returns False, meaning that the operation + does not claim to have unique tags. + """ + return False + + @abstractmethod def forward(self, *streams: "SyncStream") -> "SyncStream": ... -class Tracker: +class Tracker(ABC): """ A tracker is a class that can track the invocations of operations. Only "active" trackers participate in tracking and its `record` method gets called on each invocation of an operation. @@ -124,9 +141,12 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, ext_tb): self.deactivate() + @abstractmethod def record(self, invocation: "Invocation") -> None: ... +# This is NOT an abstract class, but rather a concrete class that +# represents an invocation of an operation on a collection of streams. class Invocation(HashableMixin): """ This class represents an invocation of an operation on a collection of streams. @@ -138,6 +158,7 @@ class Invocation(HashableMixin): def __init__( self, operation: Operation, + # TODO: technically this should be Stream to stay consistent with Stream interface streams: Collection["SyncStream"], ) -> None: self.operation = operation @@ -171,8 +192,20 @@ def __lt__(self, other: Any) -> bool: # otherwise, order by the operation return hash(self.operation) < hash(other.operation) + def claims_unique_tags(self, trigger_run: bool = True) -> bool: + """ + Returns True if the invocation has unique tags, False otherwise. + This method is useful for checking if the invocation can be used as a source + for other operations that require unique tags. None is returned if the + uniqueness of tags cannot be determined. + Note that uniqueness is best thought of as a "claim" by the operation + that it has unique tags. The actual uniqueness can only be verified + by iterating over the streams and checking the tags. + """ + return self.operation.claims_unique_tags(*self.streams, trigger_run=trigger_run) + -class Stream(HashableMixin): +class Stream(ABC, HashableMixin): """ A stream is a collection of tagged-packets that are generated by an operation. The stream is iterable and can be used to access the packets in the stream. @@ -242,6 +275,20 @@ def keys(self) -> tuple[Collection[str] | None, Collection[str] | None]: tag, packet = next(iter(self)) return list(tag.keys()), list(packet.keys()) + def claims_unique_tags(self) -> bool: + """ + Returns True if the stream has unique tags, False otherwise. + This method is useful for checking if the stream can be used as a source + for other operations that require unique tags. None is returned if the + uniqueness of tags cannot be determined. + If the stream is generated by an operation, the invocation is consulted for + the information about unique tags. + """ + if self.invocation is not None: + return self.invocation.claims_unique_tags() + return False + + @abstractmethod def __iter__(self) -> Iterator[tuple[Tag, Packet]]: raise NotImplementedError("Subclasses must implement __iter__ method") @@ -260,6 +307,29 @@ class SyncStream(Stream): will have to wait for the stream to finish before proceeding. """ + def claims_unique_tags(self, *, trigger_run=True) -> bool: + """ + For synchronous streams, if the stream is generated by an operation, the invocation + is consulted first to see if the uniqueness of tags can be determined without iterating over the stream. + If uniqueness cannot be determined from the invocation and if trigger_run is True, uniqueness is checked + by iterating over all elements and verifying uniqueness. + Consequently, this may trigger upstream computations and can be expensive. + If trigger_run is False, the method will return None if the uniqueness cannot be determined. + Since this consults the invocation, the resulting value is ultimately a claim and not a guarantee + of uniqueness. If guarantee of uniquess is required, then use has_unique_tags method + """ + result = super().claims_unique_tags() + if result is not None or not trigger_run: + return result + + # If the uniqueness cannot be determined from the invocation, iterate over the stream + unique_tags = set() + for idx, (tag, packet) in enumerate(self): + if tag in unique_tags: + return False + unique_tags.add(tag) + return True + def head(self, n: int = 5) -> None: """ Print the first n elements of the stream. @@ -281,7 +351,7 @@ def __len__(self) -> int: return sum(1 for _ in self) def __rshift__( - self, transformer: Callable[["SyncStream"], "SyncStream"] + self, transformer: dict | Callable[["SyncStream"], "SyncStream"] ) -> "SyncStream": """ Returns a new stream that is the result of applying the mapping to the stream. @@ -289,19 +359,24 @@ def __rshift__( are returned in a new stream. """ # TODO: remove just in time import - from .mapper import MapPackets + from .mappers import MapPackets if isinstance(transformer, dict): return MapPackets(transformer)(self) elif isinstance(transformer, Callable): return transformer(self) + # Otherwise, do not know how to handle the transformer + raise TypeError( + "transformer must be a dictionary or a callable that takes a SyncStream" + ) + def __mul__(self, other: "SyncStream") -> "SyncStream": """ Returns a new stream that is the result joining with the other stream """ # TODO: remove just in time import - from .mapper import Join + from .mappers import Join if not isinstance(other, SyncStream): raise TypeError("other must be a SyncStream") @@ -321,6 +396,9 @@ class Source(Operation, SyncStream): type of Operation that takes no input and produces a stream of packets. For convenience, the source itself is also a stream and thus can be used as an input to other operations directly. + However, note that Source is still best thought of as an Operation that + produces a stream of packets, rather than a stream itself. On almost all occasions, + Source acts as an Operation. """ def __init__(self, label: str | None = None, **kwargs) -> None: diff --git a/src/orcabridge/dj/mapper.py b/src/orcabridge/dj/mapper.py index 79d7a6c..d3f2d69 100644 --- a/src/orcabridge/dj/mapper.py +++ b/src/orcabridge/dj/mapper.py @@ -1,8 +1,9 @@ -from .stream import QueryStream -from .operation import QueryOperation -from ..mapper import Mapper, Join, MapPackets, MapTags -from typing import Optional import warnings +from typing import Optional + +from orcabridge.mappers import Join, MapPackets, Mapper, MapTags +from .operation import QueryOperation +from .stream import QueryStream class QueryMapper(QueryOperation, Mapper): diff --git a/src/orcabridge/dj/operation.py b/src/orcabridge/dj/operation.py index 0259308..d4d5a81 100644 --- a/src/orcabridge/dj/operation.py +++ b/src/orcabridge/dj/operation.py @@ -1,5 +1,5 @@ -from .stream import QueryStream from ..base import Operation +from .stream import QueryStream class QueryOperation(Operation): diff --git a/src/orcabridge/dj/pod.py b/src/orcabridge/dj/pod.py index e278e3c..815b2dc 100644 --- a/src/orcabridge/dj/pod.py +++ b/src/orcabridge/dj/pod.py @@ -1,15 +1,16 @@ -from .stream import QueryStream, TableStream, TableCachedStream -from ..utils.name import pascal_to_snake, snake_to_pascal -from .operation import QueryOperation -from ..pod import Pod, FunctionPod -from .source import QuerySource -from .mapper import JoinQuery +import logging +from typing import Collection, Optional, Tuple + import datajoint as dj from datajoint import Schema -from typing import Collection, Tuple, Optional from datajoint.table import Table -import logging +from ..pod import FunctionPod, Pod +from ..utils.name import pascal_to_snake, snake_to_pascal +from .mapper import JoinQuery +from .operation import QueryOperation +from .source import QuerySource +from .stream import QueryStream, TableCachedStream, TableStream logger = logging.getLogger(__name__) @@ -37,9 +38,7 @@ def __init__( self.fp = fp self.schema = schema self.table_name = ( - table_name - if table_name is not None - else pascal_to_snake(fp.function.__name__) + table_name if table_name is not None else pascal_to_snake(fp.function_name) ) + (f"_{table_postfix}" if table_postfix else "") self.streams = streams if streams is not None else [] self.table = None @@ -57,7 +56,7 @@ def identity_structure(self, *streams): @property def label(self) -> str: if self._label is None: - return snake_to_pascal(self.fp.function.__name__) + return snake_to_pascal(self.fp.function_name) return self._label def prepare_source_query(self) -> Tuple[QueryStream, Collection[Table]]: diff --git a/src/orcabridge/dj/source.py b/src/orcabridge/dj/source.py index bde1ed4..cbcf0d7 100644 --- a/src/orcabridge/dj/source.py +++ b/src/orcabridge/dj/source.py @@ -1,15 +1,17 @@ -from ..source import Source -from .stream import QueryStream, TableCachedStream, TableStream -from .operation import QueryOperation -from ..stream import SyncStream -from datajoint import Table -from typing import Any, Collection, Union, Optional -from datajoint import Schema +import logging +from typing import Any, Collection, Optional, Union + import datajoint as dj +from datajoint import Schema, Table + +from orcabridge.hashing import hash_to_uuid + +from orcabridge.sources import Source +from orcabridge.streams import SyncStream from ..utils.name import pascal_to_snake, snake_to_pascal from ..utils.stream_utils import common_elements -import logging -from orcabridge.hashing import hash_to_uuid +from .operation import QueryOperation +from .stream import QueryStream, TableCachedStream, TableStream logger = logging.getLogger(__name__) diff --git a/src/orcabridge/dj/stream.py b/src/orcabridge/dj/stream.py index 1bbefe8..c8677f5 100644 --- a/src/orcabridge/dj/stream.py +++ b/src/orcabridge/dj/stream.py @@ -1,12 +1,11 @@ -from ..stream import SyncStream import copy - +import logging +from typing import Any, Collection, Union from datajoint.expression import QueryExpression from datajoint.table import Table -from typing import Collection, Any, Union -import logging +from orcabridge.streams import SyncStream logger = logging.getLogger(__name__) @@ -57,10 +56,9 @@ def __and__(self, other: Any) -> "QueryStream": """ Restrict the query stream by `other` and return a new query stream """ - from .mapper import RestrictQuery - # lazy load to avoid circular import - from ..source import TableSource + from .source import TableSource + from .mapper import RestrictQuery if isinstance(other, TableSource): other = other.table diff --git a/src/orcabridge/dj/tracker.py b/src/orcabridge/dj/tracker.py index 9be4eac..4e92273 100644 --- a/src/orcabridge/dj/tracker.py +++ b/src/orcabridge/dj/tracker.py @@ -1,20 +1,21 @@ -from orcabridge.tracker import GraphTracker -from datajoint import Schema -from typing import Collection, Tuple, Optional, Any +import sys +from collections import defaultdict from types import ModuleType -import networkx as nx +from typing import Any, Collection, Optional, Tuple +import networkx as nx +from datajoint import Schema from orcabridge.base import Operation, Source -from orcabridge.mapper import Mapper, Merge +from orcabridge.mappers import Mapper, Merge from orcabridge.pod import FunctionPod -from .stream import QueryStream -from .source import TableCachedSource, MergedQuerySource +from orcabridge.pipeline import GraphTracker + +from .mapper import convert_to_query_mapper from .operation import QueryOperation from .pod import TableCachedPod -from .mapper import convert_to_query_mapper -import sys -from collections import defaultdict +from .source import MergedQuerySource, TableCachedSource +from .stream import QueryStream def convert_to_query_operation( diff --git a/src/orcabridge/hashing/__init__.py b/src/orcabridge/hashing/__init__.py index d6809c0..2b1c5a4 100644 --- a/src/orcabridge/hashing/__init__.py +++ b/src/orcabridge/hashing/__init__.py @@ -1,19 +1,17 @@ -from .types import FileHasher, StringCacher, ObjectHasher - from .core import ( + HashableMixin, + function_content_hash, + get_function_signature, hash_file, - hash_pathset, + hash_function, hash_packet, + hash_pathset, hash_to_hex, hash_to_int, hash_to_uuid, - HashableMixin, - function_content_hash, - get_function_signature, - hash_function, ) - from .defaults import get_default_composite_hasher +from .types import FileHasher, ObjectHasher, StringCacher __all__ = [ "FileHasher", diff --git a/src/orcabridge/hashing/core.py b/src/orcabridge/hashing/core.py index 33e45b4..dd4f6a5 100644 --- a/src/orcabridge/hashing/core.py +++ b/src/orcabridge/hashing/core.py @@ -6,30 +6,32 @@ suitable for arbitrarily nested data structures and custom objects via HashableMixin. """ -from functools import partial import hashlib +import inspect import json import logging -from uuid import UUID +import zlib +from functools import partial +from os import PathLike +from pathlib import Path from typing import ( Any, - Dict, - Optional, - Union, + Callable, Collection, + Dict, + Literal, Mapping, - TypeVar, + Optional, Set, - Callable, - Literal, + TypeVar, + Union, ) -from pathlib import Path -from os import PathLike +from uuid import UUID + import xxhash -import zlib -from orcabridge.types import PathSet, Packet + +from orcabridge.types import Packet, PathSet from orcabridge.utils.name import find_noncolliding_name -import inspect # Configure logging with __name__ for proper hierarchy logger = logging.getLogger(__name__) @@ -174,7 +176,7 @@ def content_hash(self, char_count: Optional[int] = 16) -> str: if structure is None: logger.warning( f"HashableMixin.content_hash called on {self.__class__.__name__} " - "instance without identity_structure() implementation. " + "instance that returned identity_structure() of None. " "Using class name as default identity, which may not correctly reflect object uniqueness." ) # Fall back to class name for consistent behavior @@ -773,7 +775,10 @@ def hash_file(file_path, algorithm="sha256", buffer_size=65536) -> str: def get_function_signature( - func: Callable, include_defaults: bool = True, include_module: bool = True + func: Callable, + name_override: str | None = None, + include_defaults: bool = True, + include_module: bool = True, ) -> str: """ Get a stable string representation of a function's signature. @@ -796,7 +801,7 @@ def get_function_signature( parts.append(f"module:{func.__module__}") # Add function name - parts.append(f"name:{func.__name__}") + parts.append(f"name:{name_override or func.__name__}") # Add parameters param_strs = [] @@ -830,6 +835,7 @@ def _is_in_string(line, pos): def get_function_components( func: Callable, + name_override: str | None = None, include_name: bool = True, include_module: bool = True, include_declaration: bool = True, @@ -860,7 +866,7 @@ def get_function_components( # Add function name if include_name: - components.append(f"name:{func.__name__}") + components.append(f"name:{name_override or func.__name__}") # Add module if include_module and hasattr(func, "__module__"): @@ -913,7 +919,7 @@ def get_function_components( except (IOError, TypeError): # If source can't be retrieved, fall back to signature - components.append(f"name:{func.__name__}") + components.append(f"name:{name_override or func.__name__}") try: sig = inspect.signature(func) components.append(f"signature:{str(sig)}") @@ -982,6 +988,7 @@ def hash_function( function: Callable, function_hash_mode: Literal["content", "signature", "name"] = "content", return_type: Literal["hex", "int", "uuid"] = "hex", + name_override: Optional[str] = None, content_kwargs=None, hash_kwargs=None, ) -> Union[str, int, UUID]: @@ -996,7 +1003,7 @@ def hash_function( extractors: - "content": arguments for get_function_components - "signature": arguments for get_function_signature - - "name": no underlying function used - simply function.__name__ + - "name": no underlying function used - simply function.__name__ or name_override if provided hash_kwargs: Additional arguments for the hashing function that depends on the return type - "hex": arguments for hash_to_hex - "int": arguments for hash_to_int @@ -1016,14 +1023,19 @@ def hash_function( logger.debug( f"Hashing function '{function.__name__}' using mode '{function_hash_mode}'" + + (f" with name override '{name_override}'" if name_override else "") ) if function_hash_mode == "content": - hash_content = "\n".join(get_function_components(function, **content_kwargs)) + hash_content = "\n".join( + get_function_components( + function, name_override=name_override, **content_kwargs + ) + ) elif function_hash_mode == "signature": hash_content = get_function_signature(function, **content_kwargs) elif function_hash_mode == "name": - hash_content = function.__name__ + hash_content = name_override or function.__name__ else: err_msg = f"Unknown function_hash_mode: {function_hash_mode}" logger.error(err_msg) diff --git a/src/orcabridge/hashing/defaults.py b/src/orcabridge/hashing/defaults.py index fe463a5..2f65a7d 100644 --- a/src/orcabridge/hashing/defaults.py +++ b/src/orcabridge/hashing/defaults.py @@ -10,3 +10,9 @@ def get_default_composite_hasher(with_cache=True) -> CompositeHasher: string_cacher = InMemoryCacher(max_size=None) return HasherFactory.create_cached_composite(string_cacher) return HasherFactory.create_basic_composite() + + +def get_default_composite_hasher_with_cacher(cacher=None) -> CompositeHasher: + if cacher is None: + cacher = InMemoryCacher(max_size=None) + return HasherFactory.create_cached_composite(cacher) diff --git a/src/orcabridge/hashing/file_hashers.py b/src/orcabridge/hashing/file_hashers.py index d86e748..bf3365a 100644 --- a/src/orcabridge/hashing/file_hashers.py +++ b/src/orcabridge/hashing/file_hashers.py @@ -1,10 +1,10 @@ -from orcabridge.types import PathLike, PathSet, Packet -from orcabridge.hashing.core import hash_file, hash_pathset, hash_packet +from orcabridge.hashing.core import hash_file, hash_packet, hash_pathset from orcabridge.hashing.types import ( FileHasher, PathSetHasher, StringCacher, ) +from orcabridge.types import Packet, PathLike, PathSet # Completely unnecessary to inherit from FileHasher, but this diff --git a/src/orcabridge/hashing/files.py b/src/orcabridge/hashing/files.py index 9f35a5f..3a70b9d 100644 --- a/src/orcabridge/hashing/files.py +++ b/src/orcabridge/hashing/files.py @@ -1,8 +1,9 @@ -from orcabridge.types import PathLike, PathSet, Packet +import threading from typing import Optional -from orcabridge.hashing.core import hash_file, hash_pathset, hash_packet + +from orcabridge.hashing.core import hash_file, hash_packet, hash_pathset from orcabridge.hashing.types import FileHasher, StringCacher -import threading +from orcabridge.types import Packet, PathLike, PathSet # Completely unnecessary to inherit from FileHasher, but this diff --git a/src/orcabridge/hashing_legacy.py b/src/orcabridge/hashing/hashing_legacy.py similarity index 98% rename from src/orcabridge/hashing_legacy.py rename to src/orcabridge/hashing/hashing_legacy.py index 2e325bb..353a4f9 100644 --- a/src/orcabridge/hashing_legacy.py +++ b/src/orcabridge/hashing/hashing_legacy.py @@ -42,11 +42,11 @@ # def hash_function(function, function_hash_mode: str = "content", hasher_kwargs=None) -> str: # """ # Hash a function based on its content, signature, or name. - +# # Args: # function: The function to hash # function_hash_mode: The mode of hashing ('content', 'signature', 'name') -# store_name: Optional name for the store +# function_name: Optional name for the function (if not provided, uses function's __name__) # Returns: # A string representing the hash of the function diff --git a/src/orcabridge/hashing/string_cachers.py b/src/orcabridge/hashing/string_cachers.py index 75fb91e..817aa44 100644 --- a/src/orcabridge/hashing/string_cachers.py +++ b/src/orcabridge/hashing/string_cachers.py @@ -4,7 +4,7 @@ import sqlite3 import threading from pathlib import Path -from typing import Any, TYPE_CHECKING +from typing import TYPE_CHECKING, Any from orcabridge.hashing.types import StringCacher @@ -22,6 +22,60 @@ redis = None +class TransferCacher(StringCacher): + """ + Takes two string cachers as source and destination. Everytime a cached value is retrieved from source, + the value is also set in the destination cacher. + This is useful for transferring cached values between different caching mechanisms. + """ + + def __init__(self, source: StringCacher, destination: StringCacher): + """ + Initialize the TransferCacher. + + Args: + source: The source cacher to read from + destination: The destination cacher to write to + """ + self.source = source + self.destination = destination + + def transfer(self, cache_key: str) -> str | None: + """ + Transfer a cached value from source to destination. + + Args: + cache_key: The key to transfer + + Returns: + The cached value if found, otherwise None + """ + # Try to get the cached value from the source + value = self.source.get_cached(cache_key) + if value is not None: + # Set it in the destination cacher + self.destination.set_cached(cache_key, value) + return value + + def get_cached(self, cache_key: str) -> str | None: + # try to get the cached value from the destination first + value = self.destination.get_cached(cache_key) + if value is not None: + return value + # if not found in destination, get it from source + value = self.source.get_cached(cache_key) + if value is not None: + self.destination.set_cached(cache_key, value) + return value + + def set_cached(self, cache_key: str, value: str) -> None: + # Only set the value in the destination cacher + self.destination.set_cached(cache_key, value) + + def clear_cache(self) -> None: + self.destination.clear_cache() + + class InMemoryCacher(StringCacher): """Thread-safe in-memory LRU cache.""" @@ -628,7 +682,8 @@ def get_cached(self, cache_key: str) -> str | None: result = self.redis.get(self._get_prefixed_key(cache_key)) if result is None: return None - + logger.info(f"Retrieved cached value from Redis for key {cache_key}") + # Decode bytes to string if necessary if isinstance(result, bytes): return result.decode("utf-8") @@ -648,6 +703,8 @@ def set_cached(self, cache_key: str, value: str) -> None: return try: + logger.info(f"Saving cached value to Redis for key {cache_key}") + self.redis.set(self._get_prefixed_key(cache_key), value) except (redis.RedisError, redis.ConnectionError) as e: diff --git a/src/orcabridge/hashing/types.py b/src/orcabridge/hashing/types.py index f0b9ce4..6dda6c0 100644 --- a/src/orcabridge/hashing/types.py +++ b/src/orcabridge/hashing/types.py @@ -1,8 +1,9 @@ """Hash strategy protocols for dependency injection.""" from abc import ABC, abstractmethod -from typing import Protocol, Any, runtime_checkable +from typing import Any, Protocol, runtime_checkable from uuid import UUID + from orcabridge.types import Packet, PathLike, PathSet @@ -19,7 +20,7 @@ def identity_structure(self) -> Any: Should be deterministic and include all identity-relevant data. Return None to indicate no custom identity is available. """ - ... + pass # pragma: no cover class ObjectHasher(ABC): diff --git a/src/orcabridge/mapper.py b/src/orcabridge/mappers.py similarity index 80% rename from src/orcabridge/mapper.py rename to src/orcabridge/mappers.py index 81cdb60..4ced7ee 100644 --- a/src/orcabridge/mapper.py +++ b/src/orcabridge/mappers.py @@ -1,15 +1,21 @@ -from orcabridge.base import SyncStream, Mapper -from orcabridge.stream import SyncStreamFromGenerator +from collections import defaultdict +from collections.abc import Callable, Collection, Iterator +from itertools import chain +from typing import Any + + +from orcabridge.base import Mapper, SyncStream +from orcabridge.hashing import function_content_hash, hash_function +from orcabridge.streams import SyncStreamFromGenerator from orcabridge.utils.stream_utils import ( - join_tags, - check_packet_compatibility, - batch_tag, batch_packet, + batch_tags, + check_packet_compatibility, + join_tags, ) -from orcabridge.hashing import hash_function, function_content_hash -from .types import Tag, Packet -from itertools import chain -from collections.abc import Collection, Iterator, Callable +from orcabridge.utils.stream_utils import fill_missing + +from .types import Packet, Tag class Repeat(Mapper): @@ -20,6 +26,10 @@ class Repeat(Mapper): def __init__(self, repeat_count: int) -> None: super().__init__() + if not isinstance(repeat_count, int): + raise TypeError("repeat_count must be an integer") + if repeat_count < 0: + raise ValueError("repeat_count must be non-negative") self.repeat_count = repeat_count def identity_structure(self, *streams) -> tuple[str, int, set[SyncStream]]: @@ -54,15 +64,18 @@ def generator() -> Iterator[tuple[Tag, Packet]]: def __repr__(self) -> str: return f"Repeat(count={self.repeat_count})" + def claims_unique_tags( + self, *streams: SyncStream, trigger_run: bool = True + ) -> bool: + if len(streams) != 1: + raise ValueError( + "Repeat operation only supports operating on a single input stream" + ) -def fill_missing(dict, keys, default=None): - """ - Fill the missing keys in the dictionary with the specified default value. - """ - for key in keys: - if key not in dict: - dict[key] = default - return dict + # Repeat's uniquness is true only if (1) input stream has unique tags and (2) repeat count is 1 + return self.repeat_count == 1 and streams[0].claims_unique_tags( + trigger_run=trigger_run + ) class Merge(Mapper): @@ -106,6 +119,32 @@ def generator() -> Iterator[tuple[Tag, Packet]]: def __repr__(self) -> str: return "Merge()" + def claims_unique_tags( + self, *streams: SyncStream, trigger_run: bool = True + ) -> bool: + """ + Merge operation can only claim unique tags if all input streams have unique tags AND + the tag keys are not identical across all streams. + """ + if len(streams) < 2: + raise ValueError("Merge operation requires at least two streams") + # Check if all streams have unique tags + unique_tags = all( + stream.claims_unique_tags(trigger_run=trigger_run) for stream in streams + ) + if not unique_tags: + return False + # check that all streams' tag keys are not identical + tag_key_pool = set() + for stream in streams: + tag_keys, packet_keys = stream.keys() + # TODO: re-evaluate the implication of having empty tag keys in uniqueness guarantee + if tag_keys is None or set(tag_keys) in tag_key_pool: + return False + tag_key_pool.add(frozenset(tag_keys)) + + return True + class Join(Mapper): def identity_structure(self, *streams): @@ -499,7 +538,7 @@ def __init__( super().__init__() self.batch_size = batch_size if tag_processor is None: - tag_processor = lambda tags: batch_tag(tags) # noqa: E731 + tag_processor = batch_tags # noqa: E731 self.tag_processor = tag_processor self.drop_last = drop_last @@ -552,6 +591,74 @@ def identity_structure(self, *streams): ) + tuple(streams) +class GroupBy(Mapper): + def __init__( + self, + group_keys: Collection[str] | None = None, + reduce_keys: bool = False, + selection_function: Callable[[Collection[tuple[Tag, Packet]]], Collection[bool]] + | None = None, + ) -> None: + super().__init__() + self.group_keys = group_keys + self.reduce_keys = reduce_keys + self.selection_function = selection_function + + def identity_structure(self, *streams: SyncStream) -> Any: + struct = (self.__class__.__name__, self.group_keys, self.reduce_keys) + if self.selection_function is not None: + struct += (hash_function(self.selection_function),) + return struct + tuple(streams) + + def forward(self, *streams: SyncStream) -> SyncStream: + if len(streams) != 1: + raise ValueError("GroupBy operation requires exactly one stream") + + stream = streams[0] + stream_keys, packet_keys = stream.keys() + stream_keys = stream_keys or [] + packet_keys = packet_keys or [] + group_keys = self.group_keys if self.group_keys is not None else stream_keys + + def generator() -> Iterator[tuple[Tag, Packet]]: + # step through all packets in the stream and group them by the specified keys + grouped_packets: dict[tuple, list[tuple[Tag, Packet]]] = defaultdict(list) + for tag, packet in stream: + key = tuple(tag.get(key, None) for key in group_keys) + grouped_packets[key].append((tag, packet)) + + for key, packets in grouped_packets.items(): + if self.selection_function is not None: + # apply the selection function to the grouped packets + selected_packets = self.selection_function(packets) + packets = [ + p for p, selected in zip(packets, selected_packets) if selected + ] + + if not packets: + continue + + # create a new tag that combines the group keys + # if reduce_keys is True, we only keep the group keys as a singular value + new_tag = {} + if self.reduce_keys: + new_tag = {k: key[i] for i, k in enumerate(group_keys)} + remaining_keys = set(stream_keys) - set(group_keys) + else: + remaining_keys = set(stream_keys) | set(group_keys) + # for remaining keys return list of tag values + for k in remaining_keys: + if k not in new_tag: + new_tag[k] = [t.get(k, None) for t, _ in packets] + # combine all packets into a single packet + combined_packet = { + k: [p.get(k, None) for _, p in packets] for k in packet_keys + } + yield new_tag, combined_packet + + return SyncStreamFromGenerator(generator) + + class CacheStream(Mapper): """ A Mapper that caches the packets in the stream, thus avoiding upstream recomputation. diff --git a/src/orcabridge/pod.py b/src/orcabridge/pod.py index 094842f..4caf774 100644 --- a/src/orcabridge/pod.py +++ b/src/orcabridge/pod.py @@ -1,32 +1,37 @@ +import functools +import logging +import pickle +import warnings +from abc import abstractmethod +import sys +from collections.abc import Callable, Collection, Iterable, Iterator from typing import ( - Literal, Any, + Literal, ) -from collections.abc import Collection, Iterator -from orcabridge.types import Tag, Packet, PodFunction, PathSet -from orcabridge.hashing import hash_function, get_function_signature + from orcabridge.base import Operation -from orcabridge.stream import SyncStream, SyncStreamFromGenerator -from orcabridge.mapper import Join +from orcabridge.hashing import get_function_signature, hash_function +from orcabridge.mappers import Join from orcabridge.store import DataStore, NoOpDataStore -import functools -import warnings -import logging +from orcabridge.streams import SyncStream, SyncStreamFromGenerator +from orcabridge.types import Packet, PathSet, PodFunction, Tag logger = logging.getLogger(__name__) def function_pod( output_keys: Collection[str] | None = None, - store_name: str | None = None, + function_name: str | None = None, data_store: DataStore | None = None, + store_name: str | None = None, function_hash_mode: Literal["signature", "content", "name", "custom"] = "name", custom_hash: int | None = None, force_computation: bool = False, skip_memoization: bool = False, error_handling: Literal["raise", "ignore", "warn"] = "raise", **kwargs, -): +) -> Callable[..., "FunctionPod"]: """ Decorator that wraps a function in a FunctionPod instance. @@ -39,13 +44,32 @@ def function_pod( FunctionPod instance wrapping the decorated function """ - def decorator(func): - # Create a FunctionPod instance with the function and parameters + def decorator(func) -> FunctionPod: + if func.__name__ == "": + raise ValueError("Lambda functions cannot be used with function_pod") + + if not hasattr(func, "__module__") or func.__module__ is None: + raise ValueError( + f"Function {func.__name__} must be defined at module level" + ) + + # Store the original function in the module for pickling purposes + # and make sure to change the name of the function + module = sys.modules[func.__module__] + base_function_name = func.__name__ + new_function_name = f"_original_{func.__name__}" + setattr(module, new_function_name, func) + # rename the function to be consistent and make it pickleable + setattr(func, "__name__", new_function_name) + setattr(func, "__qualname__", new_function_name) + + # Create the FunctionPod pod = FunctionPod( function=func, output_keys=output_keys, - store_name=store_name, + function_name=function_name or base_function_name, data_store=data_store, + store_name=store_name, function_hash_mode=function_hash_mode, custom_hash=custom_hash, force_computation=force_computation, @@ -54,9 +78,6 @@ def decorator(func): **kwargs, ) - # Update the metadata to make the pod look more like the original function - functools.update_wrapper(pod, func) - return pod return decorator @@ -64,7 +85,7 @@ def decorator(func): class Pod(Operation): """ - A base class for all pods. A pod can be seen as a special type of operation that + An (abstract) base class for all pods. A pod can be seen as a special type of operation that only operates on the packet content without reading tags. Consequently, no operation of Pod can dependent on the tags of the packets. This is a design choice to ensure that the pods act as pure functions which is a necessary condition to guarantee reproducibility. @@ -89,26 +110,30 @@ def __call__(self, *streams: SyncStream, **kwargs) -> SyncStream: stream = self.process_stream(*streams) return super().__call__(*stream, **kwargs) - def forward(self, *streams: SyncStream) -> SyncStream: ... - - def process(self, packet: Packet) -> Packet: ... - # TODO: reimplement the memoization as dependency injection class FunctionPod(Pod): + """ + A pod that wraps a function and allows it to be used as an operation in a stream. + This pod can be used to apply a function to the packets in a stream, with optional memoization + and caching of results. It can also handle multiple output keys and error handling. + The function should accept keyword arguments that correspond to the keys in the packets. + The output of the function should be a path or a collection of paths that correspond to the output keys.""" + def __init__( self, function: PodFunction, output_keys: Collection[str] | None = None, - store_name=None, + function_name=None, data_store: DataStore | None = None, + store_name: str | None = None, function_hash_mode: Literal["signature", "content", "name", "custom"] = "name", custom_hash: int | None = None, label: str | None = None, force_computation: bool = False, - skip_cache_lookup: bool = False, + skip_memoization_lookup: bool = False, skip_memoization: bool = False, error_handling: Literal["raise", "ignore", "warn"] = "raise", _hash_function_kwargs: dict | None = None, @@ -116,23 +141,22 @@ def __init__( ) -> None: super().__init__(label=label, **kwargs) self.function = function - if output_keys is None: - output_keys = [] - self.output_keys = output_keys - if store_name is None: + self.output_keys = output_keys or [] + if function_name is None: if hasattr(self.function, "__name__"): - store_name = getattr(self.function, "__name__") + function_name = getattr(self.function, "__name__") else: raise ValueError( - "store_name must be provided if function has no __name__ attribute" + "function_name must be provided if function has no __name__ attribute" ) - self.store_name = store_name + self.function_name = function_name self.data_store = data_store if data_store is not None else NoOpDataStore() + self.store_name = store_name or function_name self.function_hash_mode = function_hash_mode self.custom_hash = custom_hash self.force_computation = force_computation - self.skip_cache_lookup = skip_cache_lookup + self.skip_memoization_lookup = skip_memoization_lookup self.skip_memoization = skip_memoization self.error_handling = error_handling self._hash_function_kwargs = _hash_function_kwargs @@ -148,6 +172,36 @@ def keys( tag_keys, _ = stream[0].keys() return tag_keys, tuple(self.output_keys) + def is_memoized(self, packet: Packet) -> bool: + return self.retrieve_memoized(packet) is not None + + def retrieve_memoized(self, packet: Packet) -> Packet | None: + """ + Retrieve a memoized packet from the data store. + Returns None if no memoized packet is found. + """ + return self.data_store.retrieve_memoized( + self.store_name, + self.content_hash(char_count=16), + packet, + ) + + def memoize( + self, + packet: Packet, + output_packet: Packet, + ) -> Packet: + """ + Memoize the output packet in the data store. + Returns the memoized packet. + """ + return self.data_store.memoize( + self.store_name, + self.content_hash(char_count=16), # identity of this function pod + packet, + output_packet, + ) + def forward(self, *streams: SyncStream) -> SyncStream: # if multiple streams are provided, join them if len(streams) > 1: @@ -161,12 +215,8 @@ def generator() -> Iterator[tuple[Tag, Packet]]: for tag, packet in stream: output_values: list["PathSet"] = [] try: - if not self.skip_cache_lookup: - memoized_packet = self.data_store.retrieve_memoized( - self.store_name, - self.content_hash(char_count=16), - packet, - ) + if not self.skip_memoization_lookup: + memoized_packet = self.retrieve_memoized(packet) else: memoized_packet = None if not self.force_computation and memoized_packet is not None: @@ -176,14 +226,10 @@ def generator() -> Iterator[tuple[Tag, Packet]]: values = self.function(**packet) if len(self.output_keys) == 0: - output_values: list["PathSet"] = [] - elif ( - len(self.output_keys) == 1 - and values is not None - and not isinstance(values, Collection) - ): - output_values = [values] - elif isinstance(values, Collection): + output_values = [] + elif len(self.output_keys) == 1: + output_values = [values] # type: ignore + elif isinstance(values, Iterable): output_values = list(values) # type: ignore elif len(self.output_keys) > 1: raise ValueError( @@ -192,7 +238,7 @@ def generator() -> Iterator[tuple[Tag, Packet]]: if len(output_values) != len(self.output_keys): raise ValueError( - "Number of output keys does not match number of values returned by function" + f"Number of output keys {len(self.output_keys)}:{self.output_keys} does not match number of values returned by function {len(output_values)}" ) except Exception as e: logger.error(f"Error processing packet {packet}: {e}") @@ -211,12 +257,7 @@ def generator() -> Iterator[tuple[Tag, Packet]]: if not self.skip_memoization: # output packet may be modified by the memoization process # e.g. if the output is a file, the path may be changed - output_packet = self.data_store.memoize( - self.store_name, - self.content_hash(), # identity of this function pod - packet, - output_packet, - ) + output_packet = self.memoize(packet, output_packet) # type: ignore n_computed += 1 logger.info(f"Computed item {n_computed}") @@ -235,18 +276,21 @@ def identity_structure(self, *streams) -> Any: } function_hash_value = hash_function( self.function, + name_override=self.function_name, function_hash_mode="content", content_kwargs=content_kwargs, ) elif self.function_hash_mode == "signature": function_hash_value = hash_function( self.function, + name_override=self.function_name, function_hash_mode="signature", content_kwargs=content_kwargs, ) elif self.function_hash_mode == "name": function_hash_value = hash_function( self.function, + name_override=self.function_name, function_hash_mode="name", content_kwargs=content_kwargs, ) diff --git a/src/orcabridge/source.py b/src/orcabridge/sources.py similarity index 80% rename from src/orcabridge/source.py rename to src/orcabridge/sources.py index d3273f0..71758bd 100644 --- a/src/orcabridge/source.py +++ b/src/orcabridge/sources.py @@ -1,15 +1,12 @@ -from orcabridge.types import Tag, Packet -from orcabridge.hashing import hash_function -from orcabridge.base import Source -from orcabridge.stream import SyncStream, SyncStreamFromGenerator -from typing import Any, Literal +from collections.abc import Callable, Collection, Iterator from os import PathLike from pathlib import Path -from collections.abc import Collection, Iterator, Callable - +from typing import Any, Literal -class LoadFromSource(Source): - pass +from orcabridge.base import Source +from orcabridge.hashing import hash_function +from orcabridge.streams import SyncStream, SyncStreamFromGenerator +from orcabridge.types import Packet, Tag class GlobSource(Source): @@ -43,31 +40,32 @@ class GlobSource(Source): ... lambda f: {'date': Path(f).stem[:8]}) """ - default_tag_function = lambda f: {"file_name": Path(f).stem} # noqa: E731 + @staticmethod + def default_tag_function(f: PathLike) -> Tag: + return {"file_name": Path(f).stem} # noqa: E731 def __init__( self, name: str, file_path: PathLike, pattern: str = "*", + absolute_path: bool = False, label: str | None = None, - tag_function: str | Callable[[PathLike], Tag] | None = None, + tag_function: Callable[[PathLike], Tag] | None = None, tag_function_hash_mode: Literal["content", "signature", "name"] = "name", expected_tag_keys: Collection[str] | None = None, **kwargs, ) -> None: super().__init__(label=label, **kwargs) self.name = name + file_path = Path(file_path) + if absolute_path: + file_path = file_path.resolve() self.file_path = file_path self.pattern = pattern self.expected_tag_keys = expected_tag_keys - if self.expected_tag_keys is None and isinstance(tag_function, str): - self.expected_tag_keys = [tag_function] if tag_function is None: tag_function = self.__class__.default_tag_function - elif isinstance(tag_function, str): - tag_key = tag_function - tag_function = lambda f: {tag_key: Path(f).stem} # noqa: E731 self.tag_function: Callable[[PathLike], Tag] = tag_function self.tag_function_hash_mode = tag_function_hash_mode @@ -127,3 +125,17 @@ def identity_structure(self, *streams) -> Any: self.pattern, tag_function_hash, ) + tuple(streams) + + def claims_unique_tags( + self, *streams: "SyncStream", trigger_run: bool = True + ) -> bool: + if len(streams) != 0: + raise ValueError( + "GlobSource does not support forwarding streams. " + "It generates its own stream from the file system." + ) + # Claim uniqueness only if the default tag function is used + if self.tag_function == self.__class__.default_tag_function: + return True + # Otherwise, delegate to the base class + return super().claims_unique_tags(trigger_run=trigger_run) diff --git a/src/orcabridge/store/__init__.py b/src/orcabridge/store/__init__.py index 9c84ab5..66a68df 100644 --- a/src/orcabridge/store/__init__.py +++ b/src/orcabridge/store/__init__.py @@ -1,4 +1,4 @@ -from .dir_data_store import DirDataStore, NoOpDataStore, DataStore +from .core import DataStore, DirDataStore, NoOpDataStore from .safe_dir_data_store import SafeDirDataStore __all__ = [ diff --git a/src/orcabridge/store/dir_data_store.py b/src/orcabridge/store/core.py similarity index 85% rename from src/orcabridge/store/dir_data_store.py rename to src/orcabridge/store/core.py index 46966da..89fe85e 100644 --- a/src/orcabridge/store/dir_data_store.py +++ b/src/orcabridge/store/core.py @@ -1,41 +1,35 @@ -from orcabridge.types import Packet -from typing import Optional +import json +import logging +import shutil +from os import PathLike from pathlib import Path + from orcabridge.hashing import hash_packet from orcabridge.hashing.defaults import get_default_composite_hasher from orcabridge.hashing.types import PacketHasher -import shutil -import logging -import json -from os import PathLike +from orcabridge.store.types import DataStore +from orcabridge.types import Packet logger = logging.getLogger(__name__) -class DataStore: - def memoize( - self, - store_name: str, - content_hash: str, - packet: Packet, - output_packet: Packet, - ) -> Packet: ... - - def retrieve_memoized( - self, store_name: str, content_hash: str, packet: Packet - ) -> Optional[Packet]: ... - - class NoOpDataStore(DataStore): """ An empty data store that does not store anything. This is useful for testing purposes or when no memoization is needed. """ + def __init__(self): + """ + Initialize the NoOpDataStore. + This does not require any parameters. + """ + pass + def memoize( self, - store_name: str, - content_hash: str, + function_name: str, + function_hash: str, packet: Packet, output_packet: Packet, overwrite: bool = False, @@ -43,8 +37,8 @@ def memoize( return output_packet def retrieve_memoized( - self, store_name: str, content_hash: str, packet: Packet - ) -> Optional[Packet]: + self, function_name: str, function_hash: str, packet: Packet + ) -> Packet | None: return None @@ -67,7 +61,7 @@ def __init__( self.preserve_filename = preserve_filename self.overwrite = overwrite self.supplement_source = supplement_source - if packet_hasher is None: + if packet_hasher is None and not legacy_mode: packet_hasher = get_default_composite_hasher(with_cache=True) self.packet_hasher = packet_hasher self.legacy_mode = legacy_mode @@ -75,8 +69,8 @@ def __init__( def memoize( self, - store_name: str, - content_hash: str, + function_name: str, + function_hash: str, packet: Packet, output_packet: Packet, ) -> Packet: @@ -84,7 +78,7 @@ def memoize( packet_hash = hash_packet(packet, algorithm=self.legacy_algorithm) else: packet_hash = self.packet_hasher.hash_packet(packet) - output_dir = self.store_dir / store_name / content_hash / str(packet_hash) + output_dir = self.store_dir / function_name / function_hash / str(packet_hash) info_path = output_dir / "_info.json" source_path = output_dir / "_source.json" @@ -138,20 +132,20 @@ def memoize( # retrieve back the memoized packet and return # TODO: consider if we want to return the original packet or the memoized one retrieved_output_packet = self.retrieve_memoized( - store_name, content_hash, packet + function_name, function_hash, packet ) if retrieved_output_packet is None: raise ValueError(f"Memoized packet {packet} not found after storing it") return retrieved_output_packet def retrieve_memoized( - self, store_name: str, content_hash: str, packet: Packet + self, function_name: str, function_hash: str, packet: Packet ) -> Packet | None: if self.legacy_mode: packet_hash = hash_packet(packet, algorithm=self.legacy_algorithm) else: packet_hash = self.packet_hasher.hash_packet(packet) - output_dir = self.store_dir / store_name / content_hash / str(packet_hash) + output_dir = self.store_dir / function_name / function_hash / str(packet_hash) info_path = output_dir / "_info.json" source_path = output_dir / "_source.json" @@ -183,11 +177,11 @@ def retrieve_memoized( logger.info(f"No memoized output found for packet {packet}") return None - def clear_store(self, store_name: str) -> None: + def clear_store(self, function_name: str) -> None: # delete the folder self.data_dir and its content - shutil.rmtree(self.store_dir / store_name) + shutil.rmtree(self.store_dir / function_name) - def clear_all_stores(self, interactive=True, store_name="", force=False) -> None: + def clear_all_stores(self, interactive=True, function_name="", force=False) -> None: """ Clear all stores in the data directory. This is a dangerous operation -- please double- and triple-check before proceeding! @@ -197,10 +191,10 @@ def clear_all_stores(self, interactive=True, store_name="", force=False) -> None If False, it will delete only if `force=True`. The user will be prompted to type in the full name of the storage (as shown in the prompt) to confirm deletion. - store_name (str): The name of the store to delete. If not using interactive mode, + function_name (str): The name of the function to delete. If not using interactive mode, this must be set to the store_dir path in order to proceed with the deletion. force (bool): If True, delete the store without prompting the user for confirmation. - If False and interactive is False, the `store_name` must match the store_dir + If False and interactive is False, the `function_name` must match the store_dir for the deletion to proceed. """ # delete the folder self.data_dir and its content @@ -212,14 +206,14 @@ def clear_all_stores(self, interactive=True, store_name="", force=False) -> None if confirm.lower() != "y": logger.info("Aborting deletion of all stores") return - store_name = input( - f"Type in the store name {self.store_dir} to confirm the deletion: " + function_name = input( + f"Type in the function name {self.store_dir} to confirm the deletion: " ) - if store_name != str(self.store_dir): + if function_name != str(self.store_dir): logger.info("Aborting deletion of all stores") return - if not force and store_name != str(self.store_dir): + if not force and function_name != str(self.store_dir): logger.info(f"Aborting deletion of all stores in {self.store_dir}") return diff --git a/src/orcabridge/store/file_ops.py b/src/orcabridge/store/file_ops.py index 13c98a6..33675a0 100644 --- a/src/orcabridge/store/file_ops.py +++ b/src/orcabridge/store/file_ops.py @@ -1,8 +1,9 @@ # file_ops.py - Atomic file operations module -import os import logging +import os from pathlib import Path + from orcabridge.types import PathLike logger = logging.getLogger(__name__) diff --git a/src/orcabridge/store/safe_dir_data_store.py b/src/orcabridge/store/safe_dir_data_store.py index 03d45b2..548039f 100644 --- a/src/orcabridge/store/safe_dir_data_store.py +++ b/src/orcabridge/store/safe_dir_data_store.py @@ -1,16 +1,16 @@ # safedirstore.py - SafeDirDataStore implementation -import os -import time +import errno +import fcntl import json import logging -import fcntl -import errno -from pathlib import Path +import os +import time from contextlib import contextmanager +from pathlib import Path from typing import Optional, Union -from .file_ops import atomic_write, atomic_copy +from .file_ops import atomic_copy, atomic_write logger = logging.getLogger(__name__) @@ -203,16 +203,16 @@ def __init__( # Create the data directory if it doesn't exist self.store_dir.mkdir(parents=True, exist_ok=True) - def _get_output_dir(self, store_name, content_hash, packet): + def _get_output_dir(self, function_name, content_hash, packet): """Get the output directory for a specific packet""" from orcabridge.hashing.core import hash_dict packet_hash = hash_dict(packet) - return self.store_dir / store_name / content_hash / str(packet_hash) + return self.store_dir / function_name / content_hash / str(packet_hash) def memoize( self, - store_name: str, + function_name: str, content_hash: str, packet: dict, output_packet: dict, @@ -222,7 +222,7 @@ def memoize( Uses file locking to ensure thread safety and process safety. Args: - store_name: Name of the store + function_name: Name of the function content_hash: Hash of the function/operation packet: Input packet output_packet: Output packet to memoize @@ -234,7 +234,7 @@ def memoize( FileLockError: If the lock cannot be acquired ValueError: If the entry already exists and overwrite is False """ - output_dir = self._get_output_dir(store_name, content_hash, packet) + output_dir = self._get_output_dir(function_name, content_hash, packet) info_path = output_dir / "_info.json" lock_path = output_dir / "_lock" completion_marker = output_dir / "_complete" @@ -247,7 +247,7 @@ def memoize( with file_lock(lock_path, shared=True, timeout=self.lock_timeout): if completion_marker.exists() and not self.overwrite: logger.info(f"Entry already exists for packet {packet}") - return self.retrieve_memoized(store_name, content_hash, packet) + return self.retrieve_memoized(function_name, content_hash, packet) except FileLockError: logger.warning("Could not acquire shared lock to check completion status") # Continue to try with exclusive lock @@ -264,7 +264,7 @@ def memoize( logger.info( f"Entry already exists for packet {packet} (verified with exclusive lock)" ) - return self.retrieve_memoized(store_name, content_hash, packet) + return self.retrieve_memoized(function_name, content_hash, packet) # Check for partial results and clean up if necessary partial_marker = output_dir / "_partial" @@ -320,7 +320,7 @@ def memoize( # Retrieve the memoized packet to ensure consistency # We don't need to acquire a new lock since we already have an exclusive lock return self._retrieve_without_lock( - store_name, content_hash, packet, output_dir + function_name, content_hash, packet, output_dir ) finally: @@ -329,7 +329,7 @@ def memoize( partial_marker.unlink(missing_ok=True) def retrieve_memoized( - self, store_name: str, content_hash: str, packet: dict + self, function_name: str, content_hash: str, packet: dict ) -> Optional[dict]: """ Retrieve a memoized output packet. @@ -337,7 +337,7 @@ def retrieve_memoized( Uses a shared lock to allow concurrent reads while preventing writes during reads. Args: - store_name: Name of the store + function_name: Name of the function content_hash: Hash of the function/operation packet: Input packet @@ -345,21 +345,21 @@ def retrieve_memoized( The memoized output packet with paths adjusted to absolute paths, or None if the packet is not found """ - output_dir = self._get_output_dir(store_name, content_hash, packet) + output_dir = self._get_output_dir(function_name, content_hash, packet) lock_path = output_dir / "_lock" # Use a shared lock for reading to allow concurrent reads try: with file_lock(lock_path, shared=True, timeout=self.lock_timeout): return self._retrieve_without_lock( - store_name, content_hash, packet, output_dir + function_name, content_hash, packet, output_dir ) except FileLockError: logger.warning(f"Could not acquire shared lock to read {output_dir}") return None def _retrieve_without_lock( - self, store_name: str, content_hash: str, packet: dict, output_dir: Path + self, function_name: str, content_hash: str, packet: dict, output_dir: Path ) -> Optional[dict]: """ Helper to retrieve a memoized packet without acquiring a lock. @@ -367,7 +367,7 @@ def _retrieve_without_lock( This is used internally when we already have a lock. Args: - store_name: Name of the store + function_name: Name of the function content_hash: Hash of the function/operation packet: Input packet output_dir: Directory containing the output @@ -412,16 +412,16 @@ def _retrieve_without_lock( logger.error(f"Error loading memoized output for packet {packet}: {e}") return None - def clear_store(self, store_name: str) -> None: + def clear_store(self, function_name: str) -> None: """ Clear a specific store. Args: - store_name: Name of the store to clear + function_name: Name of the function to clear """ import shutil - store_path = self.store_dir / store_name + store_path = self.store_dir / function_name if store_path.exists(): shutil.rmtree(store_path) @@ -433,24 +433,24 @@ def clear_all_stores(self) -> None: shutil.rmtree(self.store_dir) self.store_dir.mkdir(parents=True, exist_ok=True) - def clean_stale_data(self, store_name=None, max_age=86400): + def clean_stale_data(self, function_name=None, max_age=86400): """ Clean up stale data in the store. Args: - store_name: Optional name of the store to clean, or None for all stores + function_name: Optional name of the function to clean, or None for all functions max_age: Maximum age of data in seconds before it's considered stale """ import shutil - if store_name is None: + if function_name is None: # Clean all stores for store_dir in self.store_dir.iterdir(): if store_dir.is_dir(): self.clean_stale_data(store_dir.name, max_age) return - store_path = self.store_dir / store_name + store_path = self.store_dir / function_name if not store_path.is_dir(): return diff --git a/src/orcabridge/store/transfer.py b/src/orcabridge/store/transfer.py new file mode 100644 index 0000000..c4757ef --- /dev/null +++ b/src/orcabridge/store/transfer.py @@ -0,0 +1,68 @@ +# Implements transfer data store that lets you transfer memoized packets between data stores. + +from orcabridge.store.types import DataStore +from orcabridge.types import Packet + + +class TransferDataStore(DataStore): + """ + A data store that allows transferring memoized packets between different data stores. + This is useful for moving data between different storage backends. + """ + + def __init__(self, source_store: DataStore, target_store: DataStore) -> None: + self.source_store = source_store + self.target_store = target_store + + def transfer(self, function_name: str, content_hash: str, packet: Packet) -> Packet: + """ + Transfer a memoized packet from the source store to the target store. + """ + retrieved_packet = self.source_store.retrieve_memoized( + function_name, content_hash, packet + ) + if retrieved_packet is None: + raise ValueError("Packet not found in source store.") + + return self.target_store.memoize( + function_name, content_hash, packet, retrieved_packet + ) + + def retrieve_memoized( + self, function_name: str, function_hash: str, packet: Packet + ) -> Packet | None: + """ + Retrieve a memoized packet from the target store. + """ + # Try retrieving from the target store first + memoized_packet = self.target_store.retrieve_memoized( + function_name, function_hash, packet + ) + if memoized_packet is not None: + return memoized_packet + + # If not found, try retrieving from the source store + memoized_packet = self.source_store.retrieve_memoized( + function_name, function_hash, packet + ) + if memoized_packet is not None: + # Memoize the packet in the target store as part of the transfer + self.target_store.memoize( + function_name, function_hash, packet, memoized_packet + ) + + return memoized_packet + + def memoize( + self, + function_name: str, + function_hash: str, + packet: Packet, + output_packet: Packet, + ) -> Packet: + """ + Memoize a packet in the target store. + """ + return self.target_store.memoize( + function_name, function_hash, packet, output_packet + ) diff --git a/src/orcabridge/store/types.py b/src/orcabridge/store/types.py new file mode 100644 index 0000000..b32aede --- /dev/null +++ b/src/orcabridge/store/types.py @@ -0,0 +1,24 @@ +from typing import Protocol, runtime_checkable + +from orcabridge.types import Packet + + +@runtime_checkable +class DataStore(Protocol): + """ + Protocol for data stores that can memoize and retrieve packets. + This is used to define the interface for data stores like DirDataStore. + """ + + def __init__(self, *args, **kwargs) -> None: ... + def memoize( + self, + function_name: str, + function_hash: str, + packet: Packet, + output_packet: Packet, + ) -> Packet: ... + + def retrieve_memoized( + self, function_name: str, function_hash: str, packet: Packet + ) -> Packet | None: ... diff --git a/src/orcabridge/stream.py b/src/orcabridge/streams.py similarity index 92% rename from src/orcabridge/stream.py rename to src/orcabridge/streams.py index b2f6c13..03100c7 100644 --- a/src/orcabridge/stream.py +++ b/src/orcabridge/streams.py @@ -1,6 +1,7 @@ -from orcabridge.types import Tag, Packet +from collections.abc import Callable, Collection, Iterator + from orcabridge.base import SyncStream -from collections.abc import Collection, Iterator, Callable +from orcabridge.types import Packet, Tag class SyncStreamFromLists(SyncStream): @@ -11,13 +12,14 @@ def __init__( paired: Collection[tuple[Tag, Packet]] | None = None, tag_keys: list[str] | None = None, packet_keys: list[str] | None = None, + strict: bool = True, **kwargs, ) -> None: super().__init__(**kwargs) self.tag_keys = tag_keys self.packet_keys = packet_keys if tags is not None and packets is not None: - if len(tags) != len(packets): + if strict and len(tags) != len(packets): raise ValueError( "tags and packets must have the same length if both are provided" ) diff --git a/src/orcabridge/tracker.py b/src/orcabridge/tracker.py index 76c7bf7..e8224a2 100644 --- a/src/orcabridge/tracker.py +++ b/src/orcabridge/tracker.py @@ -1,5 +1,5 @@ +from orcabridge.base import Invocation, Operation, Tracker import networkx as nx -from orcabridge.base import Operation, Invocation, Tracker import matplotlib.pyplot as plt diff --git a/src/orcabridge/types.py b/src/orcabridge/types.py index 626023c..51a0284 100644 --- a/src/orcabridge/types.py +++ b/src/orcabridge/types.py @@ -1,13 +1,15 @@ -from typing import Protocol +import os from collections.abc import Collection, Mapping +from typing import Protocol + from typing_extensions import TypeAlias -import os # Convenience alias for anything pathlike PathLike = str | os.PathLike # an (optional) string or a collection of (optional) string values -TagValue: TypeAlias = str | None | Collection[str | None] +# Note that TagValue can be nested, allowing for an arbitrary depth of nested lists +TagValue: TypeAlias = str | None | Collection["TagValue"] # the top level tag is a mapping from string keys to values that can be a string or diff --git a/src/orcabridge/utils/stream_utils.py b/src/orcabridge/utils/stream_utils.py index 9edc92c..611e94e 100644 --- a/src/orcabridge/utils/stream_utils.py +++ b/src/orcabridge/utils/stream_utils.py @@ -2,9 +2,10 @@ Utility functions for handling tags """ -from typing import TypeVar from collections.abc import Collection, Mapping -from orcabridge.types import Tag, Packet +from typing import TypeVar + +from orcabridge.types import Packet, Tag K = TypeVar("K") V = TypeVar("V") @@ -50,7 +51,7 @@ def check_packet_compatibility(packet1: Packet, packet2: Packet) -> bool: return True -def batch_tag(all_tags: Collection[Tag]) -> Tag: +def batch_tags(all_tags: Collection[Tag]) -> Tag: """ Batches the tags together. Grouping values under the same key into a list. """ @@ -86,3 +87,13 @@ def batch_packet( raise KeyError(f"Packet {p} does not have key {k}") batch_packet[k].append(p[k]) return batch_packet + + +def fill_missing(dict, keys, default=None): + """ + Fill the missing keys in the dictionary with the specified default value. + """ + for key in keys: + if key not in dict: + dict[key] = default + return dict diff --git a/tests/test_hashing/generate_file_hashes.py b/tests/test_hashing/generate_file_hashes.py index 57a5e4d..a2fe385 100644 --- a/tests/test_hashing/generate_file_hashes.py +++ b/tests/test_hashing/generate_file_hashes.py @@ -11,8 +11,8 @@ import random import string import sys -from pathlib import Path from datetime import datetime +from pathlib import Path # Add the parent directory to the path to import orcabridge sys.path.append(str(Path(__file__).parent.parent.parent)) diff --git a/tests/test_hashing/generate_hash_examples.py b/tests/test_hashing/generate_hash_examples.py index bd266c1..cbba97b 100644 --- a/tests/test_hashing/generate_hash_examples.py +++ b/tests/test_hashing/generate_hash_examples.py @@ -4,9 +4,10 @@ # and revision of the codebase. import json -from pathlib import Path from collections import OrderedDict from datetime import datetime +from pathlib import Path + from orcabridge.hashing import hash_to_hex, hash_to_int, hash_to_uuid # Create the hash_samples directory if it doesn't exist diff --git a/tests/test_hashing/generate_pathset_packet_hashes.py b/tests/test_hashing/generate_pathset_packet_hashes.py index 376fb60..6314e66 100644 --- a/tests/test_hashing/generate_pathset_packet_hashes.py +++ b/tests/test_hashing/generate_pathset_packet_hashes.py @@ -13,7 +13,7 @@ # Add the parent directory to the path to import orcabridge sys.path.append(str(Path(__file__).parent.parent.parent)) -from orcabridge.hashing import hash_pathset, hash_packet +from orcabridge.hashing import hash_packet, hash_pathset # Create directories if they don't exist HASH_SAMPLES_DIR = Path(__file__).parent / "hash_samples" diff --git a/tests/test_hashing/test_basic_composite_hasher.py b/tests/test_hashing/test_basic_composite_hasher.py index d8fcc58..798f79d 100644 --- a/tests/test_hashing/test_basic_composite_hasher.py +++ b/tests/test_hashing/test_basic_composite_hasher.py @@ -9,9 +9,10 @@ """ import json -import pytest from pathlib import Path +import pytest + from orcabridge.hashing.file_hashers import HasherFactory diff --git a/tests/test_hashing/test_basic_hashing.py b/tests/test_hashing/test_basic_hashing.py index c0a8f84..5ab355f 100644 --- a/tests/test_hashing/test_basic_hashing.py +++ b/tests/test_hashing/test_basic_hashing.py @@ -1,8 +1,8 @@ from orcabridge.hashing.core import ( + HashableMixin, hash_to_hex, hash_to_int, hash_to_uuid, - HashableMixin, stable_hash, ) diff --git a/tests/test_hashing/test_cached_file_hasher.py b/tests/test_hashing/test_cached_file_hasher.py index f147b2b..e8c3199 100644 --- a/tests/test_hashing/test_cached_file_hasher.py +++ b/tests/test_hashing/test_cached_file_hasher.py @@ -3,12 +3,13 @@ """Tests for CachedFileHasher implementation.""" import json -import pytest -from pathlib import Path -import tempfile import os +import tempfile +from pathlib import Path from unittest.mock import MagicMock +import pytest + from orcabridge.hashing.file_hashers import ( BasicFileHasher, CachedFileHasher, diff --git a/tests/test_hashing/test_composite_hasher.py b/tests/test_hashing/test_composite_hasher.py index 105716d..7ca2c25 100644 --- a/tests/test_hashing/test_composite_hasher.py +++ b/tests/test_hashing/test_composite_hasher.py @@ -2,12 +2,13 @@ # filepath: /home/eywalker/workspace/orcabridge/tests/test_hashing/test_composite_hasher.py """Tests for the CompositeHasher implementation.""" -import pytest from unittest.mock import patch -from orcabridge.hashing.file_hashers import CompositeHasher, BasicFileHasher -from orcabridge.hashing.types import FileHasher, PathSetHasher, PacketHasher +import pytest + from orcabridge.hashing.core import hash_to_hex +from orcabridge.hashing.file_hashers import BasicFileHasher, CompositeHasher +from orcabridge.hashing.types import FileHasher, PacketHasher, PathSetHasher # Custom implementation of hash_file for tests that doesn't check for file existence @@ -22,9 +23,9 @@ def mock_hash_pathset( pathset, algorithm="sha256", buffer_size=65536, char_count=32, file_hasher=None ): """Mock implementation of hash_pathset that doesn't check for file existence.""" + from collections.abc import Collection from os import PathLike from pathlib import Path - from collections.abc import Collection # If file_hasher is None, we'll need to handle it differently if file_hasher is None: diff --git a/tests/test_hashing/test_file_hashes.py b/tests/test_hashing/test_file_hashes.py index 0e3da34..70ff814 100644 --- a/tests/test_hashing/test_file_hashes.py +++ b/tests/test_hashing/test_file_hashes.py @@ -8,9 +8,10 @@ """ import json -import pytest from pathlib import Path +import pytest + # Add the parent directory to the path to import orcabridge from orcabridge.hashing import hash_file diff --git a/tests/test_hashing/test_hash_samples.py b/tests/test_hashing/test_hash_samples.py index 8d4fb10..54fa32f 100644 --- a/tests/test_hashing/test_hash_samples.py +++ b/tests/test_hashing/test_hash_samples.py @@ -6,10 +6,12 @@ the hashing implementation remains stable over time. """ -import os import json -import pytest +import os from pathlib import Path + +import pytest + from orcabridge.hashing import hash_to_hex, hash_to_int, hash_to_uuid diff --git a/tests/test_hashing/test_hasher_factory.py b/tests/test_hashing/test_hasher_factory.py index eb9faf5..81631ab 100644 --- a/tests/test_hashing/test_hasher_factory.py +++ b/tests/test_hashing/test_hasher_factory.py @@ -9,7 +9,7 @@ CachedFileHasher, HasherFactory, ) -from orcabridge.hashing.string_cachers import InMemoryCacher, FileCacher +from orcabridge.hashing.string_cachers import FileCacher, InMemoryCacher class TestHasherFactoryCreateFileHasher: diff --git a/tests/test_hashing/test_hasher_parity.py b/tests/test_hashing/test_hasher_parity.py index 3d0a654..0ec700e 100644 --- a/tests/test_hashing/test_hasher_parity.py +++ b/tests/test_hashing/test_hasher_parity.py @@ -9,12 +9,13 @@ """ import json -import pytest -from pathlib import Path import random +from pathlib import Path + +import pytest +from orcabridge.hashing.core import hash_file, hash_packet, hash_pathset from orcabridge.hashing.file_hashers import HasherFactory -from orcabridge.hashing.core import hash_file, hash_pathset, hash_packet def load_hash_lut(): diff --git a/tests/test_hashing/test_path_set_hasher.py b/tests/test_hashing/test_path_set_hasher.py index 574bddd..ed75b3d 100644 --- a/tests/test_hashing/test_path_set_hasher.py +++ b/tests/test_hashing/test_path_set_hasher.py @@ -2,15 +2,16 @@ # filepath: /home/eywalker/workspace/orcabridge/tests/test_hashing/test_path_set_hasher.py """Tests for the PathSetHasher protocol implementation.""" -import pytest import os import tempfile from pathlib import Path from unittest.mock import patch +import pytest + +import orcabridge.hashing.core from orcabridge.hashing.file_hashers import DefaultPathsetHasher from orcabridge.hashing.types import FileHasher -import orcabridge.hashing.core class MockFileHasher(FileHasher): @@ -43,9 +44,10 @@ def mock_hash_pathset( pathset, algorithm="sha256", buffer_size=65536, char_count=32, file_hasher=None ): """Mock implementation of hash_pathset that doesn't check for file existence.""" - from orcabridge.hashing.core import hash_to_hex - from os import PathLike from collections.abc import Collection + from os import PathLike + + from orcabridge.hashing.core import hash_to_hex from orcabridge.utils.name import find_noncolliding_name # If file_hasher is None, we'll need to handle it differently diff --git a/tests/test_hashing/test_pathset_and_packet.py b/tests/test_hashing/test_pathset_and_packet.py index 6e0410e..91efbc7 100644 --- a/tests/test_hashing/test_pathset_and_packet.py +++ b/tests/test_hashing/test_pathset_and_packet.py @@ -7,13 +7,14 @@ functions with various input types and configurations. """ +import logging import os -import pytest import tempfile from pathlib import Path -import logging -from orcabridge.hashing import hash_pathset, hash_packet, hash_file +import pytest + +from orcabridge.hashing import hash_file, hash_packet, hash_pathset logger = logging.getLogger(__name__) diff --git a/tests/test_hashing/test_pathset_packet_hashes.py b/tests/test_hashing/test_pathset_packet_hashes.py index 548cc9a..9f31f00 100644 --- a/tests/test_hashing/test_pathset_packet_hashes.py +++ b/tests/test_hashing/test_pathset_packet_hashes.py @@ -8,11 +8,12 @@ """ import json -import pytest from pathlib import Path +import pytest + # Add the parent directory to the path to import orcabridge -from orcabridge.hashing import hash_pathset, hash_packet +from orcabridge.hashing import hash_packet, hash_pathset def load_pathset_hash_lut(): diff --git a/tests/test_hashing/test_process_structure.py b/tests/test_hashing/test_process_structure.py index e145294..24b3b08 100644 --- a/tests/test_hashing/test_process_structure.py +++ b/tests/test_hashing/test_process_structure.py @@ -1,9 +1,9 @@ -from typing import Any import uuid -from collections import namedtuple, OrderedDict +from collections import OrderedDict, namedtuple from pathlib import Path +from typing import Any -from orcabridge.hashing.core import HashableMixin, process_structure, hash_to_hex +from orcabridge.hashing.core import HashableMixin, hash_to_hex, process_structure # Define a simple HashableMixin class for testing diff --git a/tests/test_hashing/test_sqlite_cacher.py b/tests/test_hashing/test_sqlite_cacher.py index 898a7f3..99a8030 100644 --- a/tests/test_hashing/test_sqlite_cacher.py +++ b/tests/test_hashing/test_sqlite_cacher.py @@ -5,7 +5,8 @@ import threading import time from pathlib import Path -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch + from orcabridge.hashing.string_cachers import SQLiteCacher diff --git a/tests/test_hashing/test_string_cacher/test_file_cacher.py b/tests/test_hashing/test_string_cacher/test_file_cacher.py index 20e8057..223fcf8 100644 --- a/tests/test_hashing/test_string_cacher/test_file_cacher.py +++ b/tests/test_hashing/test_string_cacher/test_file_cacher.py @@ -4,7 +4,8 @@ import tempfile import threading from pathlib import Path -from unittest.mock import patch, mock_open +from unittest.mock import mock_open, patch + from orcabridge.hashing.string_cachers import FileCacher diff --git a/tests/test_hashing/test_string_cacher/test_in_memory_cacher.py b/tests/test_hashing/test_string_cacher/test_in_memory_cacher.py index 8dcf7b0..5e76e44 100644 --- a/tests/test_hashing/test_string_cacher/test_in_memory_cacher.py +++ b/tests/test_hashing/test_string_cacher/test_in_memory_cacher.py @@ -2,6 +2,7 @@ import threading import time + from orcabridge.hashing.string_cachers import InMemoryCacher diff --git a/tests/test_hashing/test_string_cacher/test_redis_cacher.py b/tests/test_hashing/test_string_cacher/test_redis_cacher.py index 6477921..ac04b82 100644 --- a/tests/test_hashing/test_string_cacher/test_redis_cacher.py +++ b/tests/test_hashing/test_string_cacher/test_redis_cacher.py @@ -1,7 +1,10 @@ """Tests for RedisCacher using mocked Redis.""" -import pytest +from typing import cast from unittest.mock import patch + +import pytest + from orcabridge.hashing.string_cachers import RedisCacher diff --git a/tests/test_hashing/test_string_cacher/test_sqlite_cacher.py b/tests/test_hashing/test_string_cacher/test_sqlite_cacher.py index bb8eab2..9204543 100644 --- a/tests/test_hashing/test_string_cacher/test_sqlite_cacher.py +++ b/tests/test_hashing/test_string_cacher/test_sqlite_cacher.py @@ -5,7 +5,8 @@ import threading import time from pathlib import Path -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch + from orcabridge.hashing.string_cachers import SQLiteCacher diff --git a/tests/test_hashing/test_string_cachers.py b/tests/test_hashing/test_string_cachers.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_store/conftest.py b/tests/test_store/conftest.py index 7f157e6..77ca9f9 100644 --- a/tests/test_store/conftest.py +++ b/tests/test_store/conftest.py @@ -2,11 +2,12 @@ # filepath: /home/eywalker/workspace/orcabridge/tests/test_store/conftest.py """Common test fixtures for store tests.""" -import pytest -import tempfile import shutil +import tempfile from pathlib import Path +import pytest + @pytest.fixture def temp_dir(): diff --git a/tests/test_store/test_dir_data_store.py b/tests/test_store/test_dir_data_store.py index 7f61b01..37e467c 100644 --- a/tests/test_store/test_dir_data_store.py +++ b/tests/test_store/test_dir_data_store.py @@ -2,18 +2,19 @@ # filepath: /home/eywalker/workspace/orcabridge/tests/test_store/test_dir_data_store.py """Tests for DirDataStore.""" -import pytest import json import shutil from pathlib import Path -from orcabridge.store.dir_data_store import DirDataStore +import pytest + from orcabridge.hashing.types import ( + CompositeFileHasher, FileHasher, - PathSetHasher, PacketHasher, - CompositeFileHasher, + PathSetHasher, ) +from orcabridge.store.core import DirDataStore class MockFileHasher(FileHasher): @@ -447,7 +448,7 @@ def test_dir_data_store_clear_all_stores(temp_dir, sample_files): assert (store_dir / "store2").exists() # Clear all stores with force and non-interactive mode - store.clear_all_stores(interactive=False, store_name=str(store_dir), force=True) + store.clear_all_stores(interactive=False, function_name=str(store_dir), force=True) # Check that the entire store directory was deleted assert not store_dir.exists() diff --git a/tests/test_store/test_integration.py b/tests/test_store/test_integration.py index 9efc8f3..22c67c9 100644 --- a/tests/test_store/test_integration.py +++ b/tests/test_store/test_integration.py @@ -2,17 +2,18 @@ # filepath: /home/eywalker/workspace/orcabridge/tests/test_store/test_integration.py """Integration tests for the store module.""" -import pytest import os from pathlib import Path -from orcabridge.store.dir_data_store import DirDataStore, NoOpDataStore +import pytest + from orcabridge.hashing.file_hashers import ( BasicFileHasher, CachedFileHasher, CompositeHasher, ) from orcabridge.hashing.string_cachers import InMemoryCacher +from orcabridge.store.core import DirDataStore, NoOpDataStore def test_integration_with_cached_file_hasher(temp_dir, sample_files): @@ -82,10 +83,10 @@ def test_integration_data_store_chain(temp_dir, sample_files): store2.memoize("test_chain", "content_hash_456", packet2, output_packet2) # Create a function that tries each store in sequence - def retrieve_from_stores(store_name, content_hash, packet): + def retrieve_from_stores(function_name, content_hash, packet): for store in [store1, store2, store3]: try: - result = store.retrieve_memoized(store_name, content_hash, packet) + result = store.retrieve_memoized(function_name, content_hash, packet) if result is not None: return result except FileNotFoundError: @@ -113,11 +114,11 @@ def retrieve_from_stores(store_name, content_hash, packet): # without actually trying to hash nonexistent files original_retrieve = store1.retrieve_memoized - def mocked_retrieve(store_name, content_hash, packet): + def mocked_retrieve(function_name, content_hash, packet): # Only return None for our specific test case - if store_name == "test_chain" and content_hash == "content_hash_789": + if function_name == "test_chain" and content_hash == "content_hash_789": return None - return original_retrieve(store_name, content_hash, packet) + return original_retrieve(function_name, content_hash, packet) # Apply the mock to all stores store1.retrieve_memoized = mocked_retrieve diff --git a/tests/test_store/test_noop_data_store.py b/tests/test_store/test_noop_data_store.py index 80ffd24..8f160d1 100644 --- a/tests/test_store/test_noop_data_store.py +++ b/tests/test_store/test_noop_data_store.py @@ -3,7 +3,8 @@ """Tests for NoOpDataStore.""" import pytest -from orcabridge.store.dir_data_store import NoOpDataStore + +from orcabridge.store.core import NoOpDataStore def test_noop_data_store_memoize(): @@ -43,7 +44,7 @@ def test_noop_data_store_retrieve_memoized(): def test_noop_data_store_is_data_store_subclass(): """Test that NoOpDataStore is a subclass of DataStore.""" - from orcabridge.store.dir_data_store import DataStore + from orcabridge.store.core import DataStore store = NoOpDataStore() assert isinstance(store, DataStore) diff --git a/tests/test_store/test_transfer_data_store.py b/tests/test_store/test_transfer_data_store.py new file mode 100644 index 0000000..ddb1d09 --- /dev/null +++ b/tests/test_store/test_transfer_data_store.py @@ -0,0 +1,450 @@ +#!/usr/bin/env python +# filepath: /home/eywalker/workspace/orcabridge/tests/test_store/test_transfer_data_store.py +"""Tests for TransferDataStore.""" + +import json +from pathlib import Path + +import pytest + +from orcabridge.hashing.types import PacketHasher +from orcabridge.store.core import DirDataStore, NoOpDataStore +from orcabridge.store.transfer import TransferDataStore + + +class MockPacketHasher(PacketHasher): + """Mock PacketHasher for testing.""" + + def __init__(self, hash_value="mock_hash"): + self.hash_value = hash_value + self.packet_hash_calls = [] + + def hash_packet(self, packet): + self.packet_hash_calls.append(packet) + return f"{self.hash_value}_packet" + + +def test_transfer_data_store_basic_setup(temp_dir, sample_files): + """Test basic setup of TransferDataStore.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + source_store = DirDataStore(store_dir=source_store_dir) + target_store = DirDataStore(store_dir=target_store_dir) + + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Verify the stores are set correctly + assert transfer_store.source_store is source_store + assert transfer_store.target_store is target_store + + +def test_transfer_data_store_memoize_to_target(temp_dir, sample_files): + """Test that memoize stores packets in the target store.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + source_store = DirDataStore(store_dir=source_store_dir) + target_store = DirDataStore(store_dir=target_store_dir) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create packet and output + packet = {"input_file": sample_files["input"]["file1"]} + output_packet = {"output_file": sample_files["output"]["output1"]} + + # Memoize through transfer store + result = transfer_store.memoize( + "test_store", "content_hash_123", packet, output_packet + ) + + # Verify the packet was stored in target store + assert "output_file" in result + + # Verify we can retrieve it directly from target store + retrieved_from_target = target_store.retrieve_memoized( + "test_store", "content_hash_123", packet + ) + assert retrieved_from_target is not None + assert "output_file" in retrieved_from_target + + # Verify it's NOT in the source store + retrieved_from_source = source_store.retrieve_memoized( + "test_store", "content_hash_123", packet + ) + assert retrieved_from_source is None + + +def test_transfer_data_store_retrieve_from_target_first(temp_dir, sample_files): + """Test that retrieve_memoized checks target store first.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + source_store = DirDataStore(store_dir=source_store_dir) + target_store = DirDataStore(store_dir=target_store_dir) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create packet and output + packet = {"input_file": sample_files["input"]["file1"]} + output_packet = {"output_file": sample_files["output"]["output1"]} + + # Store directly in target store + target_store.memoize("test_store", "content_hash_123", packet, output_packet) + + # Retrieve through transfer store should find it in target + result = transfer_store.retrieve_memoized("test_store", "content_hash_123", packet) + + assert result is not None + assert "output_file" in result + + +def test_transfer_data_store_fallback_to_source_and_copy(temp_dir, sample_files): + """Test that retrieve_memoized falls back to source store and copies to target.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + source_store = DirDataStore(store_dir=source_store_dir) + target_store = DirDataStore(store_dir=target_store_dir) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create packet and output + packet = {"input_file": sample_files["input"]["file1"]} + output_packet = {"output_file": sample_files["output"]["output1"]} + + # Store only in source store + source_store.memoize("test_store", "content_hash_123", packet, output_packet) + + # Verify it's not in target initially + retrieved_from_target = target_store.retrieve_memoized( + "test_store", "content_hash_123", packet + ) + assert retrieved_from_target is None + + # Retrieve through transfer store should find it in source and copy to target + result = transfer_store.retrieve_memoized("test_store", "content_hash_123", packet) + + assert result is not None + assert "output_file" in result + + # Now verify it was copied to target store + retrieved_from_target_after = target_store.retrieve_memoized( + "test_store", "content_hash_123", packet + ) + assert retrieved_from_target_after is not None + assert "output_file" in retrieved_from_target_after + + +def test_transfer_data_store_multiple_packets(temp_dir, sample_files): + """Test transfer functionality with multiple packets.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + source_store = DirDataStore(store_dir=source_store_dir) + target_store = DirDataStore(store_dir=target_store_dir) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create multiple packets + packets = [ + {"input_file": sample_files["input"]["file1"]}, + {"input_file": sample_files["input"]["file2"]}, + ] + + output_packets = [ + {"output_file": sample_files["output"]["output1"]}, + {"output_file": sample_files["output"]["output2"]}, + ] + + content_hashes = ["content_hash_1", "content_hash_2"] + + # Store all packets in source store + for i, (packet, output_packet, content_hash) in enumerate( + zip(packets, output_packets, content_hashes) + ): + source_store.memoize("test_store", content_hash, packet, output_packet) + + # Verify none are in target initially + for packet, content_hash in zip(packets, content_hashes): + retrieved = target_store.retrieve_memoized("test_store", content_hash, packet) + assert retrieved is None + + # Retrieve all packets through transfer store + results = [] + for packet, content_hash in zip(packets, content_hashes): + result = transfer_store.retrieve_memoized("test_store", content_hash, packet) + assert result is not None + results.append(result) + + # Verify all packets are now in target store + for packet, content_hash in zip(packets, content_hashes): + retrieved = target_store.retrieve_memoized("test_store", content_hash, packet) + assert retrieved is not None + assert "output_file" in retrieved + + +def test_transfer_data_store_explicit_transfer_method(temp_dir, sample_files): + """Test the explicit transfer method.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + source_store = DirDataStore(store_dir=source_store_dir) + target_store = DirDataStore(store_dir=target_store_dir) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create packet and output + packet = {"input_file": sample_files["input"]["file1"]} + output_packet = {"output_file": sample_files["output"]["output1"]} + + # Store in source store + source_store.memoize("test_store", "content_hash_123", packet, output_packet) + + # Use explicit transfer method + result = transfer_store.transfer("test_store", "content_hash_123", packet) + + assert result is not None + assert "output_file" in result + + # Verify it's now in target store + retrieved_from_target = target_store.retrieve_memoized( + "test_store", "content_hash_123", packet + ) + assert retrieved_from_target is not None + + +def test_transfer_data_store_transfer_method_not_found(temp_dir, sample_files): + """Test transfer method raises error when packet not found in source.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + source_store = DirDataStore(store_dir=source_store_dir) + target_store = DirDataStore(store_dir=target_store_dir) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create packet + packet = {"input_file": sample_files["input"]["file1"]} + + # Try to transfer packet that doesn't exist + with pytest.raises(ValueError, match="Packet not found in source store"): + transfer_store.transfer("test_store", "nonexistent_hash", packet) + + +def test_transfer_data_store_retrieve_nonexistent_packet(temp_dir, sample_files): + """Test retrieve_memoized returns None for nonexistent packets.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + source_store = DirDataStore(store_dir=source_store_dir) + target_store = DirDataStore(store_dir=target_store_dir) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create packet + packet = {"input_file": sample_files["input"]["file1"]} + + # Try to retrieve nonexistent packet + result = transfer_store.retrieve_memoized("test_store", "nonexistent_hash", packet) + assert result is None + + +def test_transfer_data_store_different_file_hashers(temp_dir, sample_files): + """Test transfer between stores with different file hashers.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + # Create stores with different hashers + source_hasher = MockPacketHasher(hash_value="source_hash") + target_hasher = MockPacketHasher(hash_value="target_hash") + + source_store = DirDataStore(store_dir=source_store_dir, packet_hasher=source_hasher) + target_store = DirDataStore(store_dir=target_store_dir, packet_hasher=target_hasher) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create packet and output + packet = {"input_file": sample_files["input"]["file1"]} + output_packet = {"output_file": sample_files["output"]["output1"]} + + # Store in source store + source_store.memoize("test_store", "content_hash_123", packet, output_packet) + + # Verify it's in source store using source hasher + retrieved_from_source = source_store.retrieve_memoized( + "test_store", "content_hash_123", packet + ) + assert retrieved_from_source is not None + + # Transfer through transfer store - this should work despite different hashers + result = transfer_store.retrieve_memoized("test_store", "content_hash_123", packet) + assert result is not None + assert "output_file" in result + + # Verify it's now in target store using target hasher + retrieved_from_target = target_store.retrieve_memoized( + "test_store", "content_hash_123", packet + ) + assert retrieved_from_target is not None + + # Verify both hashers were called + assert len(source_hasher.packet_hash_calls) > 0 + assert len(target_hasher.packet_hash_calls) > 0 + + +def test_transfer_data_store_memoize_new_packet_with_different_hashers( + temp_dir, sample_files +): + """Test memoizing new packets when source and target have different hashers.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + # Create stores with different hashers + source_hasher = MockPacketHasher(hash_value="source_hash") + target_hasher = MockPacketHasher(hash_value="target_hash") + + source_store = DirDataStore(store_dir=source_store_dir, packet_hasher=source_hasher) + target_store = DirDataStore(store_dir=target_store_dir, packet_hasher=target_hasher) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create packet and output + packet = {"input_file": sample_files["input"]["file1"]} + output_packet = {"output_file": sample_files["output"]["output1"]} + + # Memoize through transfer store (should go to target) + result = transfer_store.memoize( + "test_store", "content_hash_123", packet, output_packet + ) + + assert result is not None + assert "output_file" in result + + # Verify it's only in target store, not source + retrieved_from_target = target_store.retrieve_memoized( + "test_store", "content_hash_123", packet + ) + assert retrieved_from_target is not None + + retrieved_from_source = source_store.retrieve_memoized( + "test_store", "content_hash_123", packet + ) + assert retrieved_from_source is None + + # Verify target hasher was used for memoization + assert len(target_hasher.packet_hash_calls) > 0 + + +def test_transfer_data_store_complex_transfer_scenario(temp_dir, sample_files): + """Test complex scenario with multiple operations and different hashers.""" + source_store_dir = Path(temp_dir) / "source_store" + target_store_dir = Path(temp_dir) / "target_store" + + # Create stores with different hashers + source_hasher = MockPacketHasher(hash_value="source_hash") + target_hasher = MockPacketHasher(hash_value="target_hash") + + source_store = DirDataStore(store_dir=source_store_dir, packet_hasher=source_hasher) + target_store = DirDataStore(store_dir=target_store_dir, packet_hasher=target_hasher) + transfer_store = TransferDataStore( + source_store=source_store, target_store=target_store + ) + + # Create multiple packets + packets = [ + {"input_file": sample_files["input"]["file1"]}, + {"input_file": sample_files["input"]["file2"]}, + ] + + output_packets = [ + {"output_file": sample_files["output"]["output1"]}, + {"output_file": sample_files["output"]["output2"]}, + ] + + content_hashes = ["content_hash_1", "content_hash_2"] + + # 1. Store first packet directly in source + source_store.memoize("test_store", content_hashes[0], packets[0], output_packets[0]) + + # 2. Store second packet through transfer store (should go to target) + transfer_store.memoize( + "test_store", content_hashes[1], packets[1], output_packets[1] + ) + + # 3. Retrieve first packet through transfer store (should copy from source to target) + result1 = transfer_store.retrieve_memoized( + "test_store", content_hashes[0], packets[0] + ) + assert result1 is not None + + # 4. Retrieve second packet through transfer store (should find in target directly) + result2 = transfer_store.retrieve_memoized( + "test_store", content_hashes[1], packets[1] + ) + assert result2 is not None + + # 5. Verify both packets are now in target store + for packet, content_hash in zip(packets, content_hashes): + retrieved = target_store.retrieve_memoized("test_store", content_hash, packet) + assert retrieved is not None + assert "output_file" in retrieved + + # 6. Verify first packet is still in source, second is not + retrieved_source_1 = source_store.retrieve_memoized( + "test_store", content_hashes[0], packets[0] + ) + assert retrieved_source_1 is not None + + retrieved_source_2 = source_store.retrieve_memoized( + "test_store", content_hashes[1], packets[1] + ) + assert retrieved_source_2 is None + + +def test_transfer_data_store_with_noop_stores(temp_dir, sample_files): + """Test transfer store behavior with NoOpDataStore.""" + # Test with NoOp as source + noop_source = NoOpDataStore() + target_store_dir = Path(temp_dir) / "target_store" + target_store = DirDataStore(store_dir=target_store_dir) + + transfer_store = TransferDataStore( + source_store=noop_source, target_store=target_store + ) + + packet = {"input": sample_files["input"]["file1"]} + + # Should return None since NoOp store doesn't store anything + result = transfer_store.retrieve_memoized("test_store", "hash123", packet) + assert result is None + + # Test with NoOp as target + source_store_dir = Path(temp_dir) / "source_store" + source_store = DirDataStore(store_dir=source_store_dir) + noop_target = NoOpDataStore() + + transfer_store2 = TransferDataStore( + source_store=source_store, target_store=noop_target + ) + + output_packet = {"output": sample_files["output"]["output1"]} + + # Memoize should work (goes to target which is NoOp) + result = transfer_store2.memoize("test_store", "hash123", packet, output_packet) + assert result == output_packet # NoOp just returns the output packet + + +if __name__ == "__main__": + pytest.main(["-v", __file__]) diff --git a/tests/test_streams_operations/__init__.py b/tests/test_streams_operations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_streams_operations/conftest.py b/tests/test_streams_operations/conftest.py new file mode 100644 index 0000000..b6420a3 --- /dev/null +++ b/tests/test_streams_operations/conftest.py @@ -0,0 +1,204 @@ +""" +Shared fixtures for streams and operations testing. +""" + +import tempfile +import json +import numpy as np +from pathlib import Path +from typing import Any, Iterator +import pytest + +from orcabridge.types import Tag, Packet +from orcabridge.streams import SyncStreamFromLists +from orcabridge.store import DirDataStore + + +@pytest.fixture +def temp_dir(): + """Create a temporary directory for testing.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield Path(tmpdir) + + +@pytest.fixture +def sample_tags(): + """Sample tags for testing.""" + return [ + {"file_name": "day1", "session": "morning"}, + {"file_name": "day2", "session": "afternoon"}, + {"file_name": "day3", "session": "evening"}, + ] + + +@pytest.fixture +def sample_packets(): + """Sample packets for testing.""" + return [ + {"txt_file": "data/day1.txt", "metadata": "meta1.json"}, + {"txt_file": "data/day2.txt", "metadata": "meta2.json"}, + {"txt_file": "data/day3.txt", "metadata": "meta3.json"}, + ] + + +@pytest.fixture +def sample_stream(sample_tags, sample_packets): + """Create a sample stream from tags and packets.""" + return SyncStreamFromLists( + tags=sample_tags, + packets=sample_packets, + tag_keys=["file_name", "session"], + packet_keys=["txt_file", "metadata"], + ) + + +@pytest.fixture +def empty_stream() -> SyncStreamFromLists: + """Create an empty stream.""" + return SyncStreamFromLists(paired=[]) + + +@pytest.fixture +def single_item_stream() -> SyncStreamFromLists: + """Create a stream with a single item.""" + return SyncStreamFromLists(tags=[{"name": "single"}], packets=[{"data": "value"}]) + + +@pytest.fixture +def test_files(temp_dir) -> dict[str, Any]: + """Create test files for source testing.""" + # Create text files + txt_dir = temp_dir / "txt_files" + txt_dir.mkdir() + + txt_files = [] + for i, day in enumerate(["day1", "day2", "day3"], 1): + txt_file = txt_dir / f"{day}.txt" + txt_file.write_text(f"Content for {day}\n" * (i * 5)) + txt_files.append(txt_file) + + # Create binary files with numpy arrays + bin_dir = temp_dir / "bin_files" + bin_dir.mkdir() + + bin_files = [] + for i, session in enumerate(["session_day1", "session_day2"], 1): + bin_file = bin_dir / f"{session}.bin" + data = np.random.rand(10 * i).astype(np.float64) + bin_file.write_bytes(data.tobytes()) + bin_files.append(bin_file) + + # Create json files + json_dir = temp_dir / "json_files" + json_dir.mkdir() + + json_files = [] + for i, info in enumerate(["info_day1", "info_day2"], 1): + json_file = json_dir / f"{info}.json" + data = {"lines": i * 5, "day": f"day{i}", "processed": False} + json_file.write_text(json.dumps(data)) + json_files.append(json_file) + + return { + "txt_dir": txt_dir, + "txt_files": txt_files, + "bin_dir": bin_dir, + "bin_files": bin_files, + "json_dir": json_dir, + "json_files": json_files, + } + + +@pytest.fixture +def data_store(temp_dir) -> DirDataStore: + """Create a test data store.""" + store_dir = temp_dir / "data_store" + return DirDataStore(store_dir=store_dir) + + +# Sample functions for FunctionPod testing + + +def sample_function_no_output(input_file: str) -> None: + """Sample function that takes input but returns nothing.""" + pass + + +def sample_function_single_output(input_file: str) -> str: + """Sample function that returns a single output.""" + return str(Path(input_file).with_suffix(".processed")) + + +def sample_function_multiple_outputs(input_file: str) -> tuple[str, str]: + """Sample function that returns multiple outputs.""" + base = Path(input_file).stem + return f"{base}_output1.txt", f"{base}_output2.txt" + + +def sample_function_with_error(input_file: str) -> str: + """Sample function that raises an error.""" + raise ValueError("Intentional error for testing") + + +def count_lines_function(txt_file: str) -> int: + """Function that counts lines in a text file.""" + with open(txt_file, "r") as f: + return len(f.readlines()) + + +def compute_stats_function(bin_file: str, temp_dir: str | None = None) -> str: + """Function that computes statistics on binary data.""" + import tempfile + + with open(bin_file, "rb") as f: + data = np.frombuffer(f.read(), dtype=np.float64) + + stats = { + "mean": float(np.mean(data)), + "std": float(np.std(data)), + "min": float(np.min(data)), + "max": float(np.max(data)), + "count": len(data), + } + + if temp_dir is None: + output_file = Path(tempfile.mkdtemp()) / "stats.json" + else: + output_file = Path(temp_dir) / "stats.json" + + with open(output_file, "w") as f: + json.dump(stats, f) + + return str(output_file) + + +# Predicate functions for Filter testing + + +def filter_by_session_morning(tag: Tag, packet: Packet) -> bool: + """Filter predicate that keeps only morning sessions.""" + return tag.get("session") == "morning" + + +def filter_by_filename_pattern(tag: Tag, packet: Packet) -> bool: + """Filter predicate that keeps files matching a pattern.""" + return "day1" in tag.get("file_name", "") # type: ignore + + +# Transform functions + + +def transform_add_prefix(tag: Tag, packet: Packet) -> tuple[Tag, Packet]: + """Transform that adds prefix to file_name tag.""" + new_tag = tag.copy() + if "file_name" in new_tag: + new_tag["file_name"] = f"prefix_{new_tag['file_name']}" + return new_tag, packet + + +def transform_rename_keys(tag: Tag, packet: Packet) -> tuple[Tag, Packet]: + """Transform that renames packet keys.""" + new_packet = packet.copy() + if "txt_file" in new_packet: + new_packet["content"] = new_packet.pop("txt_file") + return tag, new_packet diff --git a/tests/test_streams_operations/test_mappers/__init__.py b/tests/test_streams_operations/test_mappers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_streams_operations/test_mappers/test_batch.py b/tests/test_streams_operations/test_mappers/test_batch.py new file mode 100644 index 0000000..b30701e --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_batch.py @@ -0,0 +1,290 @@ +"""Tests for Batch mapper functionality.""" + +import pytest +from orcabridge.mappers import Batch +from orcabridge.streams import SyncStreamFromLists + + +class TestBatch: + """Test cases for Batch mapper.""" + + def test_batch_basic(self, sample_tags, sample_packets): + """Test basic batch functionality.""" + stream = SyncStreamFromLists(sample_tags, sample_packets) + batch = Batch(2, drop_last=False) + batched_stream = batch(stream) + + result = list(batched_stream) + + # Should have 2 batches: [packet1, packet2] and [packet3] + assert len(result) == 2 + + batch1_tag, batch1_packet = result[0] + batch2_tag, batch2_packet = result[1] + + # First batch should have 2 items + assert len(batch1_packet["txt_file"]) == 2 + for k, v in batch1_packet.items(): + assert v == [p[k] for p in sample_packets[:2]] + + assert len(batch2_packet["txt_file"]) == 1 + for k, v in batch2_packet.items(): + assert v == [p[k] for p in sample_packets[2:]] + + def test_batch_exact_division(self): + """Test batch when stream length divides evenly by batch size.""" + packets = [1, 2, 3, 4, 5, 6] + tags = ["a", "b", "c", "d", "e", "f"] + + stream = SyncStreamFromLists(packets, tags) + batch = Batch(3) + batched_stream = batch(stream) + + result = list(batched_stream) + + # Should have exactly 2 batches + assert len(result) == 2 + + batch1_packet, _ = result[0] + batch2_packet, _ = result[1] + + assert len(batch1_packet) == 3 + assert len(batch2_packet) == 3 + assert list(batch1_packet) == [1, 2, 3] + assert list(batch2_packet) == [4, 5, 6] + + def test_batch_size_one(self, sample_packets, sample_tags): + """Test batch with size 1.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + batch = Batch(1) + batched_stream = batch(stream) + + result = list(batched_stream) + + # Should have same number of batches as original packets + assert len(result) == len(sample_packets) + + for i, (batch_packet, batch_tag) in enumerate(result): + assert len(batch_packet) == 1 + assert list(batch_packet) == [sample_packets[i]] + + def test_batch_larger_than_stream(self, sample_packets, sample_tags): + """Test batch size larger than stream.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + batch = Batch(10) # Larger than sample_packets length + batched_stream = batch(stream) + + result = list(batched_stream) + + # Should have exactly 1 batch with all packets + assert len(result) == 1 + + batch_packet, batch_tag = result[0] + assert len(batch_packet) == len(sample_packets) + assert list(batch_packet) == sample_packets + + def test_batch_empty_stream(self): + """Test batch with empty stream.""" + empty_stream = SyncStreamFromLists([], []) + batch = Batch(3) + batched_stream = batch(empty_stream) + + result = list(batched_stream) + assert len(result) == 0 + + def test_batch_preserves_packet_types(self): + """Test that batch preserves different packet types.""" + packets = [PacketType("data1"), {"key": "value"}, [1, 2, 3], 42, "string"] + tags = ["type1", "type2", "type3", "type4", "type5"] + + stream = SyncStreamFromLists(packets, tags) + batch = Batch(2) + batched_stream = batch(stream) + + result = list(batched_stream) + + # Should have 3 batches: [2, 2, 1] + assert len(result) == 3 + + # Check first batch + batch1_packet, _ = result[0] + batch1_list = list(batch1_packet) + assert batch1_list[0] == PacketType("data1") + assert batch1_list[1] == {"key": "value"} + + # Check second batch + batch2_packet, _ = result[1] + batch2_list = list(batch2_packet) + assert batch2_list[0] == [1, 2, 3] + assert batch2_list[1] == 42 + + # Check third batch + batch3_packet, _ = result[2] + batch3_list = list(batch3_packet) + assert batch3_list[0] == "string" + + def test_batch_tag_handling(self, sample_packets, sample_tags): + """Test how batch handles tags.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + batch = Batch(2) + batched_stream = batch(stream) + + result = list(batched_stream) + + # Each batch should have some representation of the constituent tags + for batch_packet, batch_tag in result: + assert batch_tag is not None + # The exact format depends on implementation + + def test_batch_maintains_order(self): + """Test that batch maintains packet order within batches.""" + packets = [f"packet_{i}" for i in range(10)] + tags = [f"tag_{i}" for i in range(10)] + + stream = SyncStreamFromLists(packets, tags) + batch = Batch(3) + batched_stream = batch(stream) + + result = list(batched_stream) + + # Should have 4 batches: [3, 3, 3, 1] + assert len(result) == 4 + + # Check order within each batch + all_packets = [] + for batch_packet, _ in result: + all_packets.extend(list(batch_packet)) + + assert all_packets == packets + + def test_batch_large_stream(self): + """Test batch with large stream.""" + packets = [f"packet_{i}" for i in range(1000)] + tags = [f"tag_{i}" for i in range(1000)] + + stream = SyncStreamFromLists(packets, tags) + batch = Batch(50) + batched_stream = batch(stream) + + result = list(batched_stream) + + # Should have exactly 20 batches of 50 each + assert len(result) == 20 + + for i, (batch_packet, _) in enumerate(result): + assert len(batch_packet) == 50 + expected_packets = packets[i * 50 : (i + 1) * 50] + assert list(batch_packet) == expected_packets + + def test_batch_invalid_size(self): + """Test batch with invalid size.""" + with pytest.raises(ValueError): + Batch(0) + + with pytest.raises(ValueError): + Batch(-1) + + with pytest.raises(TypeError): + Batch(3.5) + + with pytest.raises(TypeError): + Batch("3") + + def test_batch_chaining(self, sample_packets, sample_tags): + """Test chaining batch operations.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + + # First batch: size 2 + batch1 = Batch(2) + stream1 = batch1(stream) + + # Second batch: size 1 (batch the batches) + batch2 = Batch(1) + stream2 = batch2(stream1) + + result = list(stream2) + + # Each item should be a batch containing a single batch + for batch_packet, _ in result: + assert len(batch_packet) == 1 + # The contained item should itself be a batch + + def test_batch_with_generator_stream(self): + """Test batch with generator-based stream.""" + + def packet_generator(): + for i in range(7): + yield f"packet_{i}", f"tag_{i}" + + from orcabridge.stream import SyncStreamFromGenerator + + stream = SyncStreamFromGenerator(packet_generator()) + + batch = Batch(3) + batched_stream = batch(stream) + + result = list(batched_stream) + + # Should have 3 batches: [3, 3, 1] + assert len(result) == 3 + + batch1_packet, _ = result[0] + batch2_packet, _ = result[1] + batch3_packet, _ = result[2] + + assert len(batch1_packet) == 3 + assert len(batch2_packet) == 3 + assert len(batch3_packet) == 1 + + def test_batch_memory_efficiency(self): + """Test that batch doesn't consume excessive memory.""" + # Create a large stream + packets = [f"packet_{i}" for i in range(10000)] + tags = [f"tag_{i}" for i in range(10000)] + + stream = SyncStreamFromLists(packets, tags) + batch = Batch(100) + batched_stream = batch(stream) + + # Process one batch at a time to test memory efficiency + batch_count = 0 + for batch_packet, _ in batched_stream: + batch_count += 1 + assert len(batch_packet) <= 100 + if batch_count == 50: # Stop early to avoid processing everything + break + + assert batch_count == 50 + + def test_batch_with_none_packets(self): + """Test batch with None packets.""" + packets = [1, None, 3, None, 5, None] + tags = ["num1", "null1", "num3", "null2", "num5", "null3"] + + stream = SyncStreamFromLists(packets, tags) + batch = Batch(2) + batched_stream = batch(stream) + + result = list(batched_stream) + + assert len(result) == 3 + + # Check that None values are preserved + all_packets = [] + for batch_packet, _ in result: + all_packets.extend(list(batch_packet)) + + assert all_packets == packets + + def test_batch_pickle(self): + """Test that Batch mapper is pickleable.""" + import pickle + from orcabridge.mappers import Batch + + batch = Batch(batch_size=3) + pickled = pickle.dumps(batch) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, Batch) + assert unpickled.batch_size == batch.batch_size diff --git a/tests/test_streams_operations/test_mappers/test_cache_stream.py b/tests/test_streams_operations/test_mappers/test_cache_stream.py new file mode 100644 index 0000000..feefb61 --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_cache_stream.py @@ -0,0 +1,299 @@ +""" +Test module for CacheStream mapper. + +This module tests the CacheStream mapper functionality, which provides +caching capabilities to avoid upstream recomputation by storing stream data +in memory after the first iteration. +""" + +import pytest +from unittest.mock import Mock + +from orcabridge.base import SyncStream +from orcabridge.mapper import CacheStream +from orcabridge.stream import SyncStreamFromLists + + +@pytest.fixture +def cache_mapper(): + """Create a CacheStream mapper instance.""" + return CacheStream() + + +@pytest.fixture +def sample_stream_data(): + """Sample stream data for testing.""" + return [ + ({"id": 1}, {"value": 10}), + ({"id": 2}, {"value": 20}), + ({"id": 3}, {"value": 30}), + ] + + +@pytest.fixture +def sample_stream(sample_stream_data): + """Create a sample stream.""" + tags, packets = zip(*sample_stream_data) + return SyncStreamFromLists(list(tags), list(packets)) + + +class TestCacheStream: + """Test cases for CacheStream mapper.""" + + def test_cache_initialization(self, cache_mapper): + """Test that CacheStream initializes with empty cache.""" + assert cache_mapper.cache == [] + assert cache_mapper.is_cached is False + + def test_repr(self, cache_mapper): + """Test CacheStream string representation.""" + assert repr(cache_mapper) == "CacheStream(active:False)" + + # After caching + cache_mapper.is_cached = True + assert repr(cache_mapper) == "CacheStream(active:True)" + + def test_first_iteration_caches_data(self, cache_mapper, sample_stream): + """Test that first iteration through stream caches the data.""" + cached_stream = cache_mapper(sample_stream) + + # Initially not cached + assert not cache_mapper.is_cached + assert len(cache_mapper.cache) == 0 + + # Iterate through stream + result = list(cached_stream) + + # After iteration, should be cached + assert cache_mapper.is_cached + assert len(cache_mapper.cache) == 3 + assert cache_mapper.cache == [ + ({"id": 1}, {"value": 10}), + ({"id": 2}, {"value": 20}), + ({"id": 3}, {"value": 30}), + ] + + # Result should match original stream + assert result == [ + ({"id": 1}, {"value": 10}), + ({"id": 2}, {"value": 20}), + ({"id": 3}, {"value": 30}), + ] + + def test_subsequent_iterations_use_cache(self, cache_mapper, sample_stream): + """Test that subsequent iterations use cached data.""" + cached_stream = cache_mapper(sample_stream) + + # First iteration + first_result = list(cached_stream) + assert cache_mapper.is_cached + + # Create new stream from same mapper (simulates reuse) + second_cached_stream = cache_mapper() # No input streams for cached version + second_result = list(second_cached_stream) + + # Results should be identical + assert first_result == second_result + assert second_result == [ + ({"id": 1}, {"value": 10}), + ({"id": 2}, {"value": 20}), + ({"id": 3}, {"value": 30}), + ] + + def test_clear_cache(self, cache_mapper, sample_stream): + """Test cache clearing functionality.""" + cached_stream = cache_mapper(sample_stream) + + # Cache some data + list(cached_stream) + assert cache_mapper.is_cached + assert len(cache_mapper.cache) > 0 + + # Clear cache + cache_mapper.clear_cache() + assert not cache_mapper.is_cached + assert len(cache_mapper.cache) == 0 + + def test_multiple_streams_error_when_not_cached(self, cache_mapper, sample_stream): + """Test that providing multiple streams raises error when not cached.""" + stream2 = SyncStreamFromLists([{"id": 4}], [{"value": 40}]) + + with pytest.raises( + ValueError, match="CacheStream operation requires exactly one stream" + ): + cache_mapper(sample_stream, stream2) + + def test_no_streams_when_cached(self, cache_mapper, sample_stream): + """Test that cached stream can be called without input streams.""" + # First, cache some data + cached_stream = cache_mapper(sample_stream) + list(cached_stream) # This caches the data + + # Now call without streams (should use cache) + cached_only_stream = cache_mapper() + result = list(cached_only_stream) + + assert result == [ + ({"id": 1}, {"value": 10}), + ({"id": 2}, {"value": 20}), + ({"id": 3}, {"value": 30}), + ] + + def test_empty_stream_caching(self, cache_mapper): + """Test caching behavior with empty stream.""" + empty_stream = SyncStreamFromLists([], []) + cached_stream = cache_mapper(empty_stream) + + result = list(cached_stream) + + assert result == [] + assert cache_mapper.is_cached + assert cache_mapper.cache == [] + + def test_identity_structure(self, cache_mapper, sample_stream): + """Test that CacheStream has unique identity structure.""" + # CacheStream should return None for identity structure + # to treat every instance as different + assert cache_mapper.identity_structure(sample_stream) is None + + def test_avoids_upstream_recomputation(self, cache_mapper): + """Test that CacheStream avoids upstream recomputation.""" + # Create a mock stream that tracks how many times it's iterated + iteration_count = {"count": 0} + + def counting_generator(): + iteration_count["count"] += 1 + yield ({"id": 1}, {"value": 10}) + yield ({"id": 2}, {"value": 20}) + + mock_stream = Mock(spec=SyncStream) + mock_stream.__iter__ = counting_generator + + cached_stream = cache_mapper(mock_stream) + + # First iteration should call upstream + list(cached_stream) + assert iteration_count["count"] == 1 + + # Second iteration should use cache (not call upstream) + second_cached_stream = cache_mapper() + list(second_cached_stream) + assert iteration_count["count"] == 1 # Should still be 1 + + def test_cache_with_different_data_types(self, cache_mapper): + """Test caching with various data types.""" + complex_data = [ + ({"id": 1, "type": "string"}, {"data": "hello", "numbers": [1, 2, 3]}), + ({"id": 2, "type": "dict"}, {"data": {"nested": True}, "numbers": None}), + ({"id": 3, "type": "boolean"}, {"data": True, "numbers": 42}), + ] + + tags, packets = zip(*complex_data) + stream = SyncStreamFromLists(list(tags), list(packets)) + cached_stream = cache_mapper(stream) + + result = list(cached_stream) + + assert result == complex_data + assert cache_mapper.is_cached + assert cache_mapper.cache == complex_data + + def test_multiple_cache_instances(self, sample_stream): + """Test that different CacheStream instances have separate caches.""" + cache1 = CacheStream() + cache2 = CacheStream() + + # Cache in first instance + cached_stream1 = cache1(sample_stream) + list(cached_stream1) + + # Second instance should not be cached + assert cache1.is_cached + assert not cache2.is_cached + assert len(cache1.cache) == 3 + assert len(cache2.cache) == 0 + + def test_keys_method(self, cache_mapper, sample_stream): + """Test that CacheStream passes through keys correctly.""" + # CacheStream should inherit keys from input stream + tag_keys, packet_keys = cache_mapper.keys(sample_stream) + original_tag_keys, original_packet_keys = sample_stream.keys() + + assert tag_keys == original_tag_keys + assert packet_keys == original_packet_keys + + def test_chaining_with_cache(self, cache_mapper, sample_stream): + """Test chaining CacheStream with other operations.""" + from orcabridge.mapper import Filter + + # Chain cache with filter + filter_mapper = Filter(lambda tag, packet: tag["id"] > 1) + + # Cache first, then filter + cached_stream = cache_mapper(sample_stream) + filtered_stream = filter_mapper(cached_stream) + + result = list(filtered_stream) + + assert len(result) == 2 # Should have filtered out id=1 + assert result == [ + ({"id": 2}, {"value": 20}), + ({"id": 3}, {"value": 30}), + ] + + # Cache should still be populated with original data + assert cache_mapper.is_cached + assert len(cache_mapper.cache) == 3 + + def test_cache_persistence_across_multiple_outputs( + self, cache_mapper, sample_stream + ): + """Test that cache persists when creating multiple output streams.""" + # First stream + stream1 = cache_mapper(sample_stream) + result1 = list(stream1) + + # Second stream from same cache + stream2 = cache_mapper() + result2 = list(stream2) + + # Third stream from same cache + stream3 = cache_mapper() + result3 = list(stream3) + + # All results should be identical + assert result1 == result2 == result3 + assert len(result1) == 3 + + def test_error_handling_during_caching(self, cache_mapper): + """Test error handling when upstream stream raises exception.""" + + def error_generator(): + yield ({"id": 1}, {"value": 10}) + raise ValueError("Upstream error") + + mock_stream = Mock(spec=SyncStream) + mock_stream.__iter__ = error_generator + + cached_stream = cache_mapper(mock_stream) + + # Should propagate the error and not cache partial data + with pytest.raises(ValueError, match="Upstream error"): + list(cached_stream) + + # Cache should remain empty after error + assert not cache_mapper.is_cached + assert len(cache_mapper.cache) == 0 + + def test_cache_stream_pickle(self): + """Test that CacheStream mapper is pickleable.""" + import pickle + from orcabridge.mappers import CacheStream + + cache_stream = CacheStream() + pickled = pickle.dumps(cache_stream) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, CacheStream) + assert unpickled.__class__.__name__ == "CacheStream" diff --git a/tests/test_streams_operations/test_mappers/test_default_tag.py b/tests/test_streams_operations/test_mappers/test_default_tag.py new file mode 100644 index 0000000..281002b --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_default_tag.py @@ -0,0 +1,260 @@ +"""Tests for DefaultTag mapper functionality.""" + +import pytest +from orcabridge.base import PacketType +from orcabridge.mapper import DefaultTag +from orcabridge.stream import SyncStreamFromLists + + +class TestDefaultTag: + """Test cases for DefaultTag mapper.""" + + def test_default_tag_basic(self, sample_packets): + """Test basic default tag functionality.""" + tags = ["existing1", None, "existing2"] + + stream = SyncStreamFromLists(sample_packets, tags) + default_tag = DefaultTag("default_value") + result_stream = default_tag(stream) + + result = list(result_stream) + + expected_tags = ["existing1", "default_value", "existing2"] + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == sample_packets + assert actual_tags == expected_tags + + def test_default_tag_all_none(self, sample_packets): + """Test default tag when all tags are None.""" + tags = [None, None, None] + + stream = SyncStreamFromLists(sample_packets, tags) + default_tag = DefaultTag("fallback") + result_stream = default_tag(stream) + + result = list(result_stream) + + expected_tags = ["fallback", "fallback", "fallback"] + actual_tags = [tag for _, tag in result] + + assert actual_tags == expected_tags + + def test_default_tag_no_none(self, sample_packets, sample_tags): + """Test default tag when no tags are None.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + default_tag = DefaultTag("unused_default") + result_stream = default_tag(stream) + + result = list(result_stream) + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + # Should remain unchanged + assert actual_packets == sample_packets + assert actual_tags == sample_tags + + def test_default_tag_empty_stream(self): + """Test default tag with empty stream.""" + empty_stream = SyncStreamFromLists([], []) + default_tag = DefaultTag("default") + result_stream = default_tag(empty_stream) + + result = list(result_stream) + assert len(result) == 0 + + def test_default_tag_different_types(self): + """Test default tag with different default value types.""" + packets = ["data1", "data2", "data3"] + tags = [None, "existing", None] + + # Test with string default + stream1 = SyncStreamFromLists(packets, tags) + default_tag1 = DefaultTag("string_default") + result1 = list(default_tag1(stream1)) + + expected_tags1 = ["string_default", "existing", "string_default"] + actual_tags1 = [tag for _, tag in result1] + assert actual_tags1 == expected_tags1 + + # Test with numeric default + stream2 = SyncStreamFromLists(packets, tags) + default_tag2 = DefaultTag(42) + result2 = list(default_tag2(stream2)) + + expected_tags2 = [42, "existing", 42] + actual_tags2 = [tag for _, tag in result2] + assert actual_tags2 == expected_tags2 + + def test_default_tag_empty_string_vs_none(self): + """Test default tag distinguishes between empty string and None.""" + packets = ["data1", "data2", "data3"] + tags = [None, "", None] # Empty string vs None + + stream = SyncStreamFromLists(packets, tags) + default_tag = DefaultTag("default") + result_stream = default_tag(stream) + + result = list(result_stream) + + # Empty string should be preserved, None should be replaced + expected_tags = ["default", "", "default"] + actual_tags = [tag for _, tag in result] + + assert actual_tags == expected_tags + + def test_default_tag_preserves_packets(self): + """Test that default tag preserves all packet types.""" + packets = [PacketType("data1"), {"key": "value"}, [1, 2, 3], 42, "string"] + tags = [None, None, "existing", None, None] + + stream = SyncStreamFromLists(packets, tags) + default_tag = DefaultTag("default") + result_stream = default_tag(stream) + + result = list(result_stream) + + actual_packets = [packet for packet, _ in result] + expected_tags = ["default", "default", "existing", "default", "default"] + actual_tags = [tag for _, tag in result] + + assert actual_packets == packets + assert actual_tags == expected_tags + + def test_default_tag_with_complex_default(self): + """Test default tag with complex default value.""" + packets = ["data1", "data2"] + tags = [None, "existing"] + + default_value = {"type": "default", "timestamp": 12345} + + stream = SyncStreamFromLists(packets, tags) + default_tag = DefaultTag(default_value) + result_stream = default_tag(stream) + + result = list(result_stream) + + expected_tags = [default_value, "existing"] + actual_tags = [tag for _, tag in result] + + assert actual_tags == expected_tags + assert actual_tags[0] is default_value # Should be the same object + + def test_default_tag_chaining(self, sample_packets): + """Test chaining multiple default tag operations.""" + tags = [None, "middle", None] + + stream = SyncStreamFromLists(sample_packets, tags) + + # First default tag + default_tag1 = DefaultTag("first_default") + stream1 = default_tag1(stream) + + # Create new stream with some None tags again + intermediate_result = list(stream1) + new_tags = [ + None if tag == "first_default" else tag for _, tag in intermediate_result + ] + new_packets = [packet for packet, _ in intermediate_result] + + stream2 = SyncStreamFromLists(new_packets, new_tags) + default_tag2 = DefaultTag("second_default") + stream3 = default_tag2(stream2) + + final_result = list(stream3) + + # The "middle" tag should be preserved + actual_tags = [tag for _, tag in final_result] + assert "middle" in actual_tags + assert "second_default" in actual_tags + + def test_default_tag_maintains_order(self): + """Test that default tag maintains packet order.""" + packets = [f"packet_{i}" for i in range(10)] + tags = [None if i % 2 == 0 else f"tag_{i}" for i in range(10)] + + stream = SyncStreamFromLists(packets, tags) + default_tag = DefaultTag("even_default") + result_stream = default_tag(stream) + + result = list(result_stream) + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == packets + + # Check that even indices got default tags, odd indices kept original + for i in range(10): + if i % 2 == 0: + assert actual_tags[i] == "even_default" + else: + assert actual_tags[i] == f"tag_{i}" + + def test_default_tag_with_callable_default(self): + """Test default tag with callable default (if supported).""" + packets = ["data1", "data2", "data3"] + tags = [None, "existing", None] + + # Simple callable that returns a counter + class DefaultGenerator: + def __init__(self): + self.count = 0 + + def __call__(self): + self.count += 1 + return f"default_{self.count}" + + # If the implementation supports callable defaults + try: + default_gen = DefaultGenerator() + stream = SyncStreamFromLists(packets, tags) + default_tag = DefaultTag(default_gen) + result_stream = default_tag(stream) + + result = list(result_stream) + actual_tags = [tag for _, tag in result] + + # This would only work if DefaultTag supports callable defaults + # Otherwise this test should be skipped or modified + assert "existing" in actual_tags + except (TypeError, AttributeError): + # If callable defaults are not supported, that's fine + pass + + def test_default_tag_large_stream(self): + """Test default tag with large stream.""" + packets = [f"packet_{i}" for i in range(1000)] + tags = [None if i % 3 == 0 else f"tag_{i}" for i in range(1000)] + + stream = SyncStreamFromLists(packets, tags) + default_tag = DefaultTag("bulk_default") + result_stream = default_tag(stream) + + result = list(result_stream) + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert len(actual_packets) == 1000 + assert len(actual_tags) == 1000 + + # Check that every third tag was replaced + for i in range(1000): + if i % 3 == 0: + assert actual_tags[i] == "bulk_default" + else: + assert actual_tags[i] == f"tag_{i}" def test_default_tag_pickle(self): + """Test that DefaultTag mapper is pickleable.""" + import pickle + from orcabridge.mappers import DefaultTag + + default_tag = DefaultTag({"default": "test"}) + pickled = pickle.dumps(default_tag) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, DefaultTag) + assert unpickled.default_tag == default_tag.default_tag diff --git a/tests/test_streams_operations/test_mappers/test_filter.py b/tests/test_streams_operations/test_mappers/test_filter.py new file mode 100644 index 0000000..b16049d --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_filter.py @@ -0,0 +1,325 @@ +"""Tests for Filter mapper functionality.""" + +import pytest +from orcabridge.base import PacketType +from orcabridge.mapper import Filter +from orcabridge.stream import SyncStreamFromLists + + +class TestFilter: + """Test cases for Filter mapper.""" + + def test_filter_basic(self, simple_predicate): + """Test basic filter functionality.""" + packets = [1, 2, 3, 4, 5, 6] + tags = ["odd", "even", "odd", "even", "odd", "even"] + + stream = SyncStreamFromLists(packets, tags) + filter_mapper = Filter(simple_predicate) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + # Should keep only even numbers + expected_packets = [2, 4, 6] + expected_tags = ["even", "even", "even"] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_filter_none_match(self, sample_packets, sample_tags): + """Test filter when no packets match.""" + + def never_matches(packet, tag): + return False + + stream = SyncStreamFromLists(sample_packets, sample_tags) + filter_mapper = Filter(never_matches) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + assert len(result) == 0 + + def test_filter_all_match(self, sample_packets, sample_tags): + """Test filter when all packets match.""" + + def always_matches(packet, tag): + return True + + stream = SyncStreamFromLists(sample_packets, sample_tags) + filter_mapper = Filter(always_matches) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == sample_packets + assert actual_tags == sample_tags + + def test_filter_empty_stream(self, simple_predicate): + """Test filter with empty stream.""" + empty_stream = SyncStreamFromLists([], []) + filter_mapper = Filter(simple_predicate) + filtered_stream = filter_mapper(empty_stream) + + result = list(filtered_stream) + assert len(result) == 0 + + def test_filter_string_predicate(self): + """Test filter with string-based predicate.""" + packets = ["apple", "banana", "cherry", "date", "elderberry"] + tags = ["fruit1", "fruit2", "fruit3", "fruit4", "fruit5"] + + def starts_with_vowel(packet, tag): + return isinstance(packet, str) and packet[0].lower() in "aeiou" + + stream = SyncStreamFromLists(packets, tags) + filter_mapper = Filter(starts_with_vowel) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + expected_packets = ["apple", "elderberry"] + expected_tags = ["fruit1", "fruit5"] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_filter_tag_based_predicate(self): + """Test filter using tag information.""" + packets = [10, 20, 30, 40, 50] + tags = ["small", "medium", "large", "huge", "enormous"] + + def tag_length_filter(packet, tag): + return len(tag) <= 5 + + stream = SyncStreamFromLists(packets, tags) + filter_mapper = Filter(tag_length_filter) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + expected_packets = [10, 40] # "small" and "huge" have <= 5 chars + expected_tags = ["small", "huge"] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_filter_complex_predicate(self): + """Test filter with complex predicate.""" + packets = [ + {"value": 5, "type": "A", "active": True}, + {"value": 15, "type": "B", "active": False}, + {"value": 25, "type": "A", "active": True}, + {"value": 35, "type": "C", "active": True}, + {"value": 45, "type": "A", "active": False}, + ] + tags = ["item1", "item2", "item3", "item4", "item5"] + + def complex_predicate(packet, tag): + return ( + isinstance(packet, dict) + and packet.get("type") == "A" + and packet.get("active", False) + and packet.get("value", 0) > 10 + ) + + stream = SyncStreamFromLists(packets, tags) + filter_mapper = Filter(complex_predicate) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + # Only the third item matches all conditions + expected_packets = [{"value": 25, "type": "A", "active": True}] + expected_tags = ["item3"] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_filter_with_none_packets(self): + """Test filter with None packets.""" + packets = [None, "data", None, "more_data", None] + tags = ["empty1", "full1", "empty2", "full2", "empty3"] + + def not_none(packet, tag): + return packet is not None + + stream = SyncStreamFromLists(packets, tags) + filter_mapper = Filter(not_none) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + expected_packets = ["data", "more_data"] + expected_tags = ["full1", "full2"] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_filter_preserves_packet_types(self): + """Test that filter preserves packet types.""" + packets = [PacketType("data1"), [1, 2, 3], {"key": "value"}, "string", 42] + tags = ["type1", "type2", "type3", "type4", "type5"] + + def is_container(packet, tag): + return isinstance(packet, (list, dict)) + + stream = SyncStreamFromLists(packets, tags) + filter_mapper = Filter(is_container) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + expected_packets = [[1, 2, 3], {"key": "value"}] + expected_tags = ["type2", "type3"] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + assert isinstance(actual_packets[0], list) + assert isinstance(actual_packets[1], dict) + + def test_filter_maintains_order(self): + """Test that filter maintains packet order.""" + packets = [f"packet_{i}" for i in range(20)] + tags = [f"tag_{i}" for i in range(20)] + + def keep_even_indices(packet, tag): + # Extract index from packet name + index = int(packet.split("_")[1]) + return index % 2 == 0 + + stream = SyncStreamFromLists(packets, tags) + filter_mapper = Filter(keep_even_indices) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + expected_packets = [f"packet_{i}" for i in range(0, 20, 2)] + expected_tags = [f"tag_{i}" for i in range(0, 20, 2)] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_filter_predicate_exception(self, sample_packets, sample_tags): + """Test filter when predicate raises exception.""" + + def error_predicate(packet, tag): + if packet == sample_packets[1]: # Error on second packet + raise ValueError("Predicate error") + return True + + stream = SyncStreamFromLists(sample_packets, sample_tags) + filter_mapper = Filter(error_predicate) + filtered_stream = filter_mapper(stream) + + # Should propagate the exception + with pytest.raises(ValueError): + list(filtered_stream) + + def test_filter_with_lambda(self): + """Test filter with lambda predicate.""" + packets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + tags = [f"num_{i}" for i in packets] + + stream = SyncStreamFromLists(packets, tags) + filter_mapper = Filter(lambda p, t: p % 3 == 0) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + expected_packets = [3, 6, 9] + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets + + def test_filter_chaining(self): + """Test chaining multiple filter operations.""" + packets = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + tags = [f"num_{i}" for i in packets] + + stream = SyncStreamFromLists(packets, tags) + + # First filter: keep even numbers + filter1 = Filter(lambda p, t: p % 2 == 0) + stream1 = filter1(stream) + + # Second filter: keep numbers > 4 + filter2 = Filter(lambda p, t: p > 4) + stream2 = filter2(stream1) + + result = list(stream2) + + expected_packets = [6, 8, 10] # Even numbers > 4 + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets + + def test_filter_with_generator_stream(self): + """Test filter with generator-based stream.""" + + def packet_generator(): + for i in range(20): + yield i, f"tag_{i}" + + from orcabridge.stream import SyncStreamFromGenerator + + stream = SyncStreamFromGenerator(packet_generator()) + + def is_prime(packet, tag): + if packet < 2: + return False + for i in range(2, int(packet**0.5) + 1): + if packet % i == 0: + return False + return True + + filter_mapper = Filter(is_prime) + filtered_stream = filter_mapper(stream) + + result = list(filtered_stream) + + # Prime numbers under 20: 2, 3, 5, 7, 11, 13, 17, 19 + expected_packets = [2, 3, 5, 7, 11, 13, 17, 19] + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets + + def test_filter_pickle(self): + """Test that Filter mapper is pickleable.""" + import pickle + from orcabridge.mappers import Filter + + def is_even(tag, packet): + return packet % 2 == 0 + + filter_mapper = Filter(is_even) + pickled = pickle.dumps(filter_mapper) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, Filter) + assert unpickled.__class__.__name__ == "Filter" diff --git a/tests/test_streams_operations/test_mappers/test_first_match.py b/tests/test_streams_operations/test_mappers/test_first_match.py new file mode 100644 index 0000000..b282ebc --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_first_match.py @@ -0,0 +1,244 @@ +"""Tests for FirstMatch mapper functionality.""" + +import pytest +from orcabridge.base import PacketType +from orcabridge.mapper import FirstMatch +from orcabridge.stream import SyncStreamFromLists + + +class TestFirstMatch: + """Test cases for FirstMatch mapper.""" + + def test_first_match_basic(self, simple_predicate): + """Test basic first match functionality.""" + packets = [1, 2, 3, 4, 5] + tags = ["odd", "even", "odd", "even", "odd"] + + stream = SyncStreamFromLists(packets, tags) + first_match = FirstMatch(simple_predicate) + result_stream = first_match(stream) + + result = list(result_stream) + + # Should find the first packet that matches the predicate + assert len(result) == 1 + packet, tag = result[0] + assert packet == 2 # First even number + assert tag == "even" + + def test_first_match_no_match(self, sample_packets, sample_tags): + """Test first match when no packet matches.""" + + def never_matches(packet, tag): + return False + + stream = SyncStreamFromLists(sample_packets, sample_tags) + first_match = FirstMatch(never_matches) + result_stream = first_match(stream) + + result = list(result_stream) + assert len(result) == 0 + + def test_first_match_all_match(self, sample_packets, sample_tags): + """Test first match when all packets match.""" + + def always_matches(packet, tag): + return True + + stream = SyncStreamFromLists(sample_packets, sample_tags) + first_match = FirstMatch(always_matches) + result_stream = first_match(stream) + + result = list(result_stream) + + # Should return only the first packet + assert len(result) == 1 + packet, tag = result[0] + assert packet == sample_packets[0] + assert tag == sample_tags[0] + + def test_first_match_empty_stream(self, simple_predicate): + """Test first match with empty stream.""" + empty_stream = SyncStreamFromLists([], []) + first_match = FirstMatch(simple_predicate) + result_stream = first_match(empty_stream) + + result = list(result_stream) + assert len(result) == 0 + + def test_first_match_string_predicate(self): + """Test first match with string-based predicate.""" + packets = ["apple", "banana", "cherry", "date"] + tags = ["fruit1", "fruit2", "fruit3", "fruit4"] + + def starts_with_c(packet, tag): + return isinstance(packet, str) and packet.startswith("c") + + stream = SyncStreamFromLists(packets, tags) + first_match = FirstMatch(starts_with_c) + result_stream = first_match(stream) + + result = list(result_stream) + assert len(result) == 1 + packet, tag = result[0] + assert packet == "cherry" + assert tag == "fruit3" + + def test_first_match_tag_based_predicate(self): + """Test first match using tag information.""" + packets = [10, 20, 30, 40] + tags = ["small", "medium", "large", "huge"] + + def tag_contains_e(packet, tag): + return "e" in tag + + stream = SyncStreamFromLists(packets, tags) + first_match = FirstMatch(tag_contains_e) + result_stream = first_match(stream) + + result = list(result_stream) + assert len(result) == 1 + packet, tag = result[0] + assert packet == 20 # "medium" contains 'e' + assert tag == "medium" + + def test_first_match_complex_predicate(self): + """Test first match with complex predicate.""" + packets = [ + {"value": 5, "type": "A"}, + {"value": 15, "type": "B"}, + {"value": 25, "type": "A"}, + {"value": 35, "type": "C"}, + ] + tags = ["item1", "item2", "item3", "item4"] + + def complex_predicate(packet, tag): + return ( + isinstance(packet, dict) + and packet.get("value", 0) > 10 + and packet.get("type") == "A" + ) + + stream = SyncStreamFromLists(packets, tags) + first_match = FirstMatch(complex_predicate) + result_stream = first_match(stream) + + result = list(result_stream) + assert len(result) == 1 + packet, tag = result[0] + assert packet == {"value": 25, "type": "A"} + assert tag == "item3" + + def test_first_match_with_none_packets(self): + """Test first match with None packets.""" + packets = [None, "data", None, "more_data"] + tags = ["empty1", "full1", "empty2", "full2"] + + def not_none(packet, tag): + return packet is not None + + stream = SyncStreamFromLists(packets, tags) + first_match = FirstMatch(not_none) + result_stream = first_match(stream) + + result = list(result_stream) + assert len(result) == 1 + packet, tag = result[0] + assert packet == "data" + assert tag == "full1" + + def test_first_match_preserves_packet_types(self): + """Test that first match preserves packet types.""" + packets = [PacketType("data1"), [1, 2, 3], {"key": "value"}, 42] + tags = ["str", "list", "dict", "int"] + + def is_list(packet, tag): + return isinstance(packet, list) + + stream = SyncStreamFromLists(packets, tags) + first_match = FirstMatch(is_list) + result_stream = first_match(stream) + + result = list(result_stream) + assert len(result) == 1 + packet, tag = result[0] + assert packet == [1, 2, 3] + assert tag == "list" + assert isinstance(packet, list) + + def test_first_match_predicate_exception(self, sample_packets, sample_tags): + """Test first match when predicate raises exception.""" + + def error_predicate(packet, tag): + if packet == sample_packets[1]: # Error on second packet + raise ValueError("Predicate error") + return packet == sample_packets[2] # Match third packet + + stream = SyncStreamFromLists(sample_packets, sample_tags) + first_match = FirstMatch(error_predicate) + result_stream = first_match(stream) + + # The behavior here depends on implementation + # It might propagate the exception or skip the problematic packet + with pytest.raises(ValueError): + list(result_stream) + + def test_first_match_with_generator_stream(self): + """Test first match with generator-based stream.""" + + def packet_generator(): + for i in range(10): + yield f"packet_{i}", f"tag_{i}" + + from orcabridge.stream import SyncStreamFromGenerator + + stream = SyncStreamFromGenerator(packet_generator()) + + def find_packet_5(packet, tag): + return packet == "packet_5" + + first_match = FirstMatch(find_packet_5) + result_stream = first_match(stream) + + result = list(result_stream) + assert len(result) == 1 + packet, tag = result[0] + assert packet == "packet_5" + assert tag == "tag_5" + + def test_first_match_early_termination(self): + """Test that first match terminates early and doesn't process remaining packets.""" + processed_packets = [] + + def tracking_predicate(packet, tag): + processed_packets.append(packet) + return packet == "target" + + packets = ["a", "b", "target", "c", "d"] + tags = ["tag1", "tag2", "tag3", "tag4", "tag5"] + + stream = SyncStreamFromLists(packets, tags) + first_match = FirstMatch(tracking_predicate) + result_stream = first_match(stream) + + result = list(result_stream) + + # Should have found the target + assert len(result) == 1 + assert result[0][0] == "target" + + # Should have stopped processing after finding the target + assert processed_packets == ["a", "b", "target"] + + def test_first_match_pickle(self): + """Test that FirstMatch mapper is pickleable.""" + import pickle + from orcabridge.mappers import FirstMatch + + first_match = FirstMatch() + pickled = pickle.dumps(first_match) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, FirstMatch) + assert unpickled.__class__.__name__ == "FirstMatch" diff --git a/tests/test_streams_operations/test_mappers/test_group_by.py b/tests/test_streams_operations/test_mappers/test_group_by.py new file mode 100644 index 0000000..1594498 --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_group_by.py @@ -0,0 +1,298 @@ +"""Tests for GroupBy mapper functionality.""" + +import pytest +import pickle +from orcabridge.mappers import GroupBy +from orcabridge.streams import SyncStreamFromLists + + +class TestGroupBy: + """Test cases for GroupBy mapper.""" + + def test_group_by_basic(self): + """Test basic groupby functionality.""" + tags = [ + {"category": "A", "id": "1"}, + {"category": "B", "id": "2"}, + {"category": "A", "id": "3"}, + {"category": "B", "id": "4"}, + ] + packets = [ + {"value": "data/item1.txt", "name": "metadata/item1.json"}, + {"value": "data/item2.txt", "name": "metadata/item2.json"}, + {"value": "data/item3.txt", "name": "metadata/item3.json"}, + {"value": "data/item4.txt", "name": "metadata/item4.json"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + group_by = GroupBy(group_keys=["category"]) + grouped_stream = group_by(stream) + + results = list(grouped_stream) + + # Should have 2 groups (A and B) + assert len(results) == 2 + + # Check that all groups are present + categories_found = [] + for tag, _ in results: + categories_found.extend(tag["category"]) + categories = set(categories_found) + assert categories == {"A", "B"} + + # Check grouped data structure + # With reduce_keys=False (default), everything should be lists including group keys + for tag, packet in results: + if tag["category"] == ["A", "A"]: # Group key is also a list + assert tag["id"] == ["1", "3"] # IDs for category A + assert packet["value"] == [ + "data/item1.txt", + "data/item3.txt", + ] # Values for category A + assert packet["name"] == ["metadata/item1.json", "metadata/item3.json"] + elif tag["category"] == ["B", "B"]: # Group key is also a list + assert tag["id"] == ["2", "4"] # IDs for category B + assert packet["value"] == [ + "data/item2.txt", + "data/item4.txt", + ] # Values for category B + assert packet["name"] == ["metadata/item2.json", "metadata/item4.json"] + + def test_group_by_reduce_keys(self): + """Test groupby with reduce_keys=True.""" + tags = [ + {"category": "A", "id": "1", "extra": "x1"}, + {"category": "A", "id": "2", "extra": "x2"}, + {"category": "B", "id": "3", "extra": "x3"}, + ] + packets = [ + {"value": "data/item1.txt"}, + {"value": "data/item2.txt"}, + {"value": "data/item3.txt"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + group_by = GroupBy(group_keys=["category"], reduce_keys=True) + grouped_stream = group_by(stream) + + results = list(grouped_stream) + + for tag, packet in results: + if tag["category"] == "A": + # With reduce_keys=True, group keys become singular values + assert tag["category"] == "A" + # Non-group keys become lists + assert tag["id"] == ["1", "2"] + assert tag["extra"] == ["x1", "x2"] + elif tag["category"] == "B": + assert tag["category"] == "B" + assert tag["id"] == ["3"] + assert tag["extra"] == ["x3"] + + def test_group_by_no_group_keys(self): + """Test groupby without specifying group_keys (uses all tag keys).""" + tags = [ + {"category": "A", "id": "1"}, + {"category": "A", "id": "1"}, # Duplicate + {"category": "B", "id": "2"}, + ] + packets = [ + {"value": "data/item1.txt"}, + {"value": "data/item2.txt"}, + {"value": "data/item3.txt"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + group_by = GroupBy() # No group_keys specified + grouped_stream = group_by(stream) + + results = list(grouped_stream) + + # Should group by all tag keys (category, id) + assert len(results) == 2 # (A,1) and (B,2) + + # Extract group keys, accounting for lists in the results + group_keys = set() + for tag, _ in results: + # When reduce_keys=False, all values are lists + category_list = tag["category"] + id_list = tag["id"] + # Since this groups by exact matches, each group should have same values + # We'll take the first value from each list to represent the group + group_keys.add((category_list[0], id_list[0])) + assert group_keys == {("A", "1"), ("B", "2")} + + def test_group_by_with_selection_function(self): + """Test groupby with selection function.""" + tags = [ + {"category": "A", "priority": "1"}, + {"category": "A", "priority": "2"}, + {"category": "A", "priority": "3"}, + ] + packets = [ + {"value": "data/item1.txt"}, + {"value": "data/item2.txt"}, + {"value": "data/item3.txt"}, + ] + + # Selection function that only keeps items with priority >= 2 + def select_high_priority(grouped_items): + return [int(tag["priority"]) >= 2 for tag, packet in grouped_items] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + group_by = GroupBy( + group_keys=["category"], selection_function=select_high_priority + ) + grouped_stream = group_by(stream) + + results = list(grouped_stream) + + assert len(results) == 1 + tag, packet = results[0] + + # Should only have priority 2 and 3 items + assert tag["priority"] == ["2", "3"] + assert packet["value"] == ["data/item2.txt", "data/item3.txt"] + + def test_group_by_empty_stream(self): + """Test groupby with empty stream.""" + stream = SyncStreamFromLists( + tags=[], packets=[], tag_keys=["category", "id"], packet_keys=["value"] + ) + group_by = GroupBy(group_keys=["category"]) + grouped_stream = group_by(stream) + + results = list(grouped_stream) + assert len(results) == 0 + + def test_group_by_single_item(self): + """Test groupby with single item.""" + tags = [{"category": "A", "id": "1"}] + packets = [{"value": "data/item1.txt"}] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + group_by = GroupBy(group_keys=["category"]) + grouped_stream = group_by(stream) + + results = list(grouped_stream) + + assert len(results) == 1 + tag, packet = results[0] + assert tag["category"] == [ + "A" + ] # With reduce_keys=False, even single values become lists + assert tag["id"] == ["1"] + assert packet["value"] == ["data/item1.txt"] + + def test_group_by_missing_group_keys(self): + """Test groupby when some items don't have the group keys.""" + tags = [ + {"category": "A", "id": "1"}, + {"id": "2"}, # Missing category + {"category": "A", "id": "3"}, + ] + packets = [ + {"value": "data/item1.txt"}, + {"value": "data/item2.txt"}, + {"value": "data/item3.txt"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + group_by = GroupBy(group_keys=["category"]) + grouped_stream = group_by(stream) + + results = list(grouped_stream) + + # Should have 2 groups: category="A" and category=None + assert len(results) == 2 + + categories = set() + for tag, _ in results: + # When reduce_keys=False, all values are lists + category_list = tag.get("category", [None]) + if category_list and category_list != [None]: + categories.add(category_list[0]) + else: + categories.add(None) + assert categories == {"A", None} + + def test_group_by_selection_function_filters_all(self): + """Test groupby where selection function filters out all items.""" + tags = [ + {"category": "A", "priority": "1"}, + {"category": "A", "priority": "2"}, + ] + packets = [ + {"value": "data/item1.txt"}, + {"value": "data/item2.txt"}, + ] + + # Selection function that filters out everything + def select_none(grouped_items): + return [False] * len(grouped_items) + + stream = SyncStreamFromLists(tags=tags, packets=packets) + group_by = GroupBy(group_keys=["category"], selection_function=select_none) + grouped_stream = group_by(stream) + + results = list(grouped_stream) + + # Should have no results since everything was filtered out + assert len(results) == 0 + + def test_group_by_multiple_streams_error(self): + """Test that GroupBy raises error with multiple streams.""" + stream1 = SyncStreamFromLists(tags=[{"a": "1"}], packets=[{"b": "file.txt"}]) + stream2 = SyncStreamFromLists(tags=[{"c": "3"}], packets=[{"d": "file2.txt"}]) + + group_by = GroupBy(group_keys=["a"]) + + with pytest.raises(ValueError, match="exactly one stream"): + list(group_by(stream1, stream2)) + + def test_group_by_pickle(self): + """Test that GroupBy mapper is pickleable.""" + # Test basic GroupBy + group_by = GroupBy(group_keys=["category"]) + pickled = pickle.dumps(group_by) + unpickled = pickle.loads(pickled) + + assert unpickled.group_keys == group_by.group_keys + assert unpickled.reduce_keys == group_by.reduce_keys + assert unpickled.selection_function == group_by.selection_function + + # Test with reduce_keys + group_by_reduce = GroupBy(group_keys=["category"], reduce_keys=True) + pickled_reduce = pickle.dumps(group_by_reduce) + unpickled_reduce = pickle.loads(pickled_reduce) + + assert unpickled_reduce.group_keys == group_by_reduce.group_keys + assert unpickled_reduce.reduce_keys == group_by_reduce.reduce_keys + + def test_group_by_identity_structure(self): + """Test GroupBy identity_structure method.""" + stream = SyncStreamFromLists(tags=[{"a": "1"}], packets=[{"b": "file.txt"}]) + + # Test without selection function + group_by1 = GroupBy(group_keys=["category"]) + structure1 = group_by1.identity_structure(stream) + assert structure1[0] == "GroupBy" + assert structure1[1] == ["category"] + assert not structure1[2] # reduce_keys + + # Test with reduce_keys + group_by2 = GroupBy(group_keys=["category"], reduce_keys=True) + structure2 = group_by2.identity_structure(stream) + assert structure2[2] # reduce_keys + + # Different group_keys should have different structures + group_by3 = GroupBy(group_keys=["other"]) + structure3 = group_by3.identity_structure(stream) + assert structure1 != structure3 + + def test_group_by_repr(self): + """Test GroupBy string representation.""" + group_by = GroupBy(group_keys=["category"], reduce_keys=True) + repr_str = repr(group_by) + # Should contain class name and key parameters + assert "GroupBy" in repr_str diff --git a/tests/test_streams_operations/test_mappers/test_join.py b/tests/test_streams_operations/test_mappers/test_join.py new file mode 100644 index 0000000..7b60571 --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_join.py @@ -0,0 +1,198 @@ +"""Tests for Join mapper functionality.""" + +import pytest +import pickle +from orcabridge.mappers import Join +from orcabridge.streams import SyncStreamFromLists + + +class TestJoin: + """Test cases for Join mapper.""" + + def test_join_basic(self, sample_packets, sample_tags): + """Test basic join functionality.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + join = Join() + joined_stream = join(stream) + + # Join should collect all packets into a single packet + packets = list(joined_stream) + + assert len(packets) == 1 + joined_packet, joined_tag = packets[0] + + # The joined packet should contain all original packets + assert len(joined_packet) == len(sample_packets) + assert list(joined_packet) == sample_packets + + def test_join_empty_stream(self): + """Test join with empty stream.""" + empty_stream = SyncStreamFromLists([], []) + join = Join() + joined_stream = join(empty_stream) + + packets = list(joined_stream) + + assert len(packets) == 1 + joined_packet, _ = packets[0] + assert len(joined_packet) == 0 + assert list(joined_packet) == [] + + def test_join_single_packet(self): + """Test join with single packet stream.""" + packets = ["single_packet"] + tags = ["single_tag"] + stream = SyncStreamFromLists(packets, tags) + + join = Join() + joined_stream = join(stream) + + result = list(joined_stream) + assert len(result) == 1 + + joined_packet, joined_tag = result[0] + assert len(joined_packet) == 1 + assert list(joined_packet) == ["single_packet"] + + def test_join_preserves_packet_types(self): + """Test that join preserves different packet types.""" + packets = [PacketType("data1"), {"key": "value"}, [1, 2, 3], 42, "string"] + tags = ["type1", "type2", "type3", "type4", "type5"] + + stream = SyncStreamFromLists(packets, tags) + join = Join() + joined_stream = join(stream) + + result = list(joined_stream) + assert len(result) == 1 + + joined_packet, _ = result[0] + assert len(joined_packet) == 5 + + joined_list = list(joined_packet) + assert joined_list[0] == PacketType("data1") + assert joined_list[1] == {"key": "value"} + assert joined_list[2] == [1, 2, 3] + assert joined_list[3] == 42 + assert joined_list[4] == "string" + + def test_join_maintains_order(self): + """Test that join maintains packet order.""" + packets = [f"packet_{i}" for i in range(10)] + tags = [f"tag_{i}" for i in range(10)] + + stream = SyncStreamFromLists(packets, tags) + join = Join() + joined_stream = join(stream) + + result = list(joined_stream) + joined_packet, _ = result[0] + + assert list(joined_packet) == packets + + def test_join_tag_handling(self, sample_packets, sample_tags): + """Test how join handles tags.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + join = Join() + joined_stream = join(stream) + + result = list(joined_stream) + _, joined_tag = result[0] + + # The joined tag should be a collection of original tags + # (implementation-specific behavior) + assert joined_tag is not None + + def test_join_large_stream(self): + """Test join with large stream.""" + packets = [f"packet_{i}" for i in range(1000)] + tags = [f"tag_{i}" for i in range(1000)] + + stream = SyncStreamFromLists(packets, tags) + join = Join() + joined_stream = join(stream) + + result = list(joined_stream) + assert len(result) == 1 + + joined_packet, _ = result[0] + assert len(joined_packet) == 1000 + assert list(joined_packet) == packets + + def test_join_nested_structures(self): + """Test join with nested data structures.""" + packets = [{"nested": {"data": 1}}, [1, [2, 3], 4], ((1, 2), (3, 4))] + tags = ["dict", "list", "tuple"] + + stream = SyncStreamFromLists(packets, tags) + join = Join() + joined_stream = join(stream) + + result = list(joined_stream) + joined_packet, _ = result[0] + + joined_list = list(joined_packet) + assert joined_list[0] == {"nested": {"data": 1}} + assert joined_list[1] == [1, [2, 3], 4] + assert joined_list[2] == ((1, 2), (3, 4)) + + def test_join_with_none_packets(self): + """Test join with None packets.""" + packets = ["data1", None, "data2", None] + tags = ["tag1", "tag2", "tag3", "tag4"] + + stream = SyncStreamFromLists(packets, tags) + join = Join() + joined_stream = join(stream) + + result = list(joined_stream) + joined_packet, _ = result[0] + + joined_list = list(joined_packet) + assert joined_list == ["data1", None, "data2", None] + + def test_join_chaining(self, sample_packets, sample_tags): + """Test chaining join operations.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + + # First join + join1 = Join() + joined_stream1 = join1(stream) + + # Second join (should join the already joined result) + join2 = Join() + joined_stream2 = join2(joined_stream1) + + result = list(joined_stream2) + assert len(result) == 1 + + # The result should be a packet containing one element (the previous join result) + final_packet, _ = result[0] + assert len(final_packet) == 1 + + def test_join_memory_efficiency(self): + """Test that join doesn't consume excessive memory for large streams.""" + # This is more of a performance test, but we can check basic functionality + packets = [f"packet_{i}" for i in range(10000)] + tags = [f"tag_{i}" for i in range(10000)] + + stream = SyncStreamFromLists(packets, tags) + join = Join() + joined_stream = join(stream) + + # Just verify it completes without issues + result = list(joined_stream) + assert len(result) == 1 + + joined_packet, _ = result[0] + assert len(joined_packet) == 10000 + + def test_join_pickle(self): + """Test that Join mapper is pickleable.""" + join = Join() + pickled = pickle.dumps(join) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, Join) + assert unpickled.__class__.__name__ == "Join" diff --git a/tests/test_streams_operations/test_mappers/test_map_packets.py b/tests/test_streams_operations/test_mappers/test_map_packets.py new file mode 100644 index 0000000..da278de --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_map_packets.py @@ -0,0 +1,273 @@ +"""Tests for MapPackets mapper functionality.""" + +import pytest +from orcabridge.base import PacketType +from orcabridge.mapper import MapPackets +from orcabridge.stream import SyncStreamFromLists + + +class TestMapPackets: + """Test cases for MapPackets mapper.""" + + def test_map_packets_basic(self, sample_packets, sample_tags): + """Test basic map packets functionality.""" + + def add_suffix(packet): + return f"{packet}_mapped" + + stream = SyncStreamFromLists(sample_packets, sample_tags) + map_packets = MapPackets(add_suffix) + mapped_stream = map_packets(stream) + + result_packets = [] + result_tags = [] + for packet, tag in mapped_stream: + result_packets.append(packet) + result_tags.append(tag) + + # Packets should be transformed, tags unchanged + expected_packets = [f"{p}_mapped" for p in sample_packets] + assert result_packets == expected_packets + assert result_tags == sample_tags + + def test_map_packets_numeric_transformation(self): + """Test map packets with numeric transformation.""" + packets = [1, 2, 3, 4, 5] + tags = ["num1", "num2", "num3", "num4", "num5"] + + def square(packet): + return packet**2 + + stream = SyncStreamFromLists(packets, tags) + map_packets = MapPackets(square) + mapped_stream = map_packets(stream) + + result = list(mapped_stream) + + expected_packets = [1, 4, 9, 16, 25] + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == tags + + def test_map_packets_type_conversion(self): + """Test map packets with type conversion.""" + packets = ["1", "2", "3", "4"] + tags = ["str1", "str2", "str3", "str4"] + + def str_to_int(packet): + return int(packet) + + stream = SyncStreamFromLists(packets, tags) + map_packets = MapPackets(str_to_int) + mapped_stream = map_packets(stream) + + result = list(mapped_stream) + + expected_packets = [1, 2, 3, 4] + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets + assert all(isinstance(p, int) for p in actual_packets) + + def test_map_packets_complex_transformation(self): + """Test map packets with complex data transformation.""" + packets = [ + {"name": "alice", "age": 25}, + {"name": "bob", "age": 30}, + {"name": "charlie", "age": 35}, + ] + tags = ["person1", "person2", "person3"] + + def create_description(packet): + return f"{packet['name']} is {packet['age']} years old" + + stream = SyncStreamFromLists(packets, tags) + map_packets = MapPackets(create_description) + mapped_stream = map_packets(stream) + + result = list(mapped_stream) + + expected_packets = [ + "alice is 25 years old", + "bob is 30 years old", + "charlie is 35 years old", + ] + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets + + def test_map_packets_identity_function(self, sample_packets, sample_tags): + """Test map packets with identity function.""" + + def identity(packet): + return packet + + stream = SyncStreamFromLists(sample_packets, sample_tags) + map_packets = MapPackets(identity) + mapped_stream = map_packets(stream) + + result = list(mapped_stream) + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == sample_packets + assert actual_tags == sample_tags + + def test_map_packets_empty_stream(self): + """Test map packets with empty stream.""" + + def dummy_transform(packet): + return packet * 2 + + empty_stream = SyncStreamFromLists([], []) + map_packets = MapPackets(dummy_transform) + mapped_stream = map_packets(empty_stream) + + result = list(mapped_stream) + assert len(result) == 0 + + def test_map_packets_with_none_values(self): + """Test map packets with None values.""" + packets = [1, None, 3, None, 5] + tags = ["num1", "null1", "num3", "null2", "num5"] + + def handle_none(packet): + return 0 if packet is None else packet * 2 + + stream = SyncStreamFromLists(packets, tags) + map_packets = MapPackets(handle_none) + mapped_stream = map_packets(stream) + + result = list(mapped_stream) + + expected_packets = [2, 0, 6, 0, 10] + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets + + def test_map_packets_exception_handling(self): + """Test map packets when transformation function raises exception.""" + packets = [1, 2, "invalid", 4] + tags = ["num1", "num2", "str1", "num4"] + + def divide_by_packet(packet): + return 10 / packet # Will fail on "invalid" + + stream = SyncStreamFromLists(packets, tags) + map_packets = MapPackets(divide_by_packet) + mapped_stream = map_packets(stream) + + # Should raise exception when processing "invalid" + with pytest.raises(TypeError): + list(mapped_stream) + + def test_map_packets_preserves_order(self): + """Test that map packets preserves packet order.""" + packets = [f"packet_{i}" for i in range(100)] + tags = [f"tag_{i}" for i in range(100)] + + def add_prefix(packet): + return f"mapped_{packet}" + + stream = SyncStreamFromLists(packets, tags) + map_packets = MapPackets(add_prefix) + mapped_stream = map_packets(stream) + + result = list(mapped_stream) + + expected_packets = [f"mapped_packet_{i}" for i in range(100)] + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets + + def test_map_packets_with_lambda(self, sample_packets, sample_tags): + """Test map packets with lambda function.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + map_packets = MapPackets(lambda x: f"λ({x})") + mapped_stream = map_packets(stream) + + result = list(mapped_stream) + + expected_packets = [f"λ({p})" for p in sample_packets] + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets + + def test_map_packets_chaining(self, sample_packets, sample_tags): + """Test chaining multiple map packets operations.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + + # First transformation + map1 = MapPackets(lambda x: f"first_{x}") + stream1 = map1(stream) + + # Second transformation + map2 = MapPackets(lambda x: f"second_{x}") + stream2 = map2(stream1) + + result = list(stream2) + + expected_packets = [f"second_first_{p}" for p in sample_packets] + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets + + def test_map_packets_with_packet_type(self): + """Test map packets with PacketType objects.""" + packets = [PacketType("data1"), PacketType("data2")] + tags = ["type1", "type2"] + + def extract_data(packet): + return packet.data if hasattr(packet, "data") else str(packet) + + stream = SyncStreamFromLists(packets, tags) + map_packets = MapPackets(extract_data) + mapped_stream = map_packets(stream) + + result = list(mapped_stream) + actual_packets = [packet for packet, _ in result] + + # Should extract string representation or data + assert len(actual_packets) == 2 + assert all(isinstance(p, str) for p in actual_packets) + + def test_map_packets_stateful_transformation(self): + """Test map packets with stateful transformation.""" + packets = [1, 2, 3, 4, 5] + tags = ["n1", "n2", "n3", "n4", "n5"] + + class Counter: + def __init__(self): + self.count = 0 + + def transform(self, packet): + self.count += 1 + return (packet, self.count) + + counter = Counter() + stream = SyncStreamFromLists(packets, tags) + map_packets = MapPackets(counter.transform) + mapped_stream = map_packets(stream) + + result = list(mapped_stream) + + expected_packets = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5)] + actual_packets = [packet for packet, _ in result] + + assert actual_packets == expected_packets def test_map_packets_pickle(self): + """Test that MapPackets mapper is pickleable.""" + import pickle + from orcabridge.mappers import MapPackets + + # MapPackets takes a key mapping, not a transformation function + key_map = {"old_key": "new_key", "data": "value"} + map_packets = MapPackets(key_map) + pickled = pickle.dumps(map_packets) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, MapPackets) + assert unpickled.key_map == map_packets.key_map diff --git a/tests/test_streams_operations/test_mappers/test_map_tags.py b/tests/test_streams_operations/test_mappers/test_map_tags.py new file mode 100644 index 0000000..a8e185a --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_map_tags.py @@ -0,0 +1,330 @@ +"""Tests for MapTags mapper functionality.""" + +import pytest +from orcabridge.base import PacketType +from orcabridge.mapper import MapTags +from orcabridge.stream import SyncStreamFromLists + + +class TestMapTags: + """Test cases for MapTags mapper.""" + + def test_map_tags_basic(self, sample_packets, sample_tags): + """Test basic map tags functionality.""" + + def add_prefix(tag): + return f"mapped_{tag}" + + stream = SyncStreamFromLists(sample_packets, sample_tags) + map_tags = MapTags(add_prefix) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + expected_tags = [f"mapped_{t}" for t in sample_tags] + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + # Packets should be unchanged, tags transformed + assert actual_packets == sample_packets + assert actual_tags == expected_tags + + def test_map_tags_type_conversion(self, sample_packets): + """Test map tags with type conversion.""" + tags = ["1", "2", "3"] + + def str_to_int(tag): + return int(tag) + + stream = SyncStreamFromLists(sample_packets, tags) + map_tags = MapTags(str_to_int) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + expected_tags = [1, 2, 3] + actual_tags = [tag for _, tag in result] + + assert actual_tags == expected_tags + assert all(isinstance(t, int) for t in actual_tags) + + def test_map_tags_complex_transformation(self): + """Test map tags with complex transformation.""" + packets = ["data1", "data2", "data3"] + tags = [ + {"type": "string", "length": 5}, + {"type": "string", "length": 5}, + {"type": "string", "length": 5}, + ] + + def extract_type(tag): + if isinstance(tag, dict): + return tag.get("type", "unknown") + return str(tag) + + stream = SyncStreamFromLists(packets, tags) + map_tags = MapTags(extract_type) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + expected_tags = ["string", "string", "string"] + actual_tags = [tag for _, tag in result] + + assert actual_tags == expected_tags + + def test_map_tags_identity_function(self, sample_packets, sample_tags): + """Test map tags with identity function.""" + + def identity(tag): + return tag + + stream = SyncStreamFromLists(sample_packets, sample_tags) + map_tags = MapTags(identity) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == sample_packets + assert actual_tags == sample_tags + + def test_map_tags_empty_stream(self): + """Test map tags with empty stream.""" + + def dummy_transform(tag): + return f"transformed_{tag}" + + empty_stream = SyncStreamFromLists([], []) + map_tags = MapTags(dummy_transform) + mapped_stream = map_tags(empty_stream) + + result = list(mapped_stream) + assert len(result) == 0 + + def test_map_tags_with_none_values(self, sample_packets): + """Test map tags with None values.""" + tags = ["tag1", None, "tag3"] + + def handle_none(tag): + return "NULL_TAG" if tag is None else tag.upper() + + stream = SyncStreamFromLists(sample_packets, tags) + map_tags = MapTags(handle_none) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + expected_tags = ["TAG1", "NULL_TAG", "TAG3"] + actual_tags = [tag for _, tag in result] + + assert actual_tags == expected_tags + + def test_map_tags_exception_handling(self, sample_packets): + """Test map tags when transformation function raises exception.""" + tags = ["valid", "also_valid", 123] # 123 will cause error in upper() + + def to_upper(tag): + return tag.upper() # Will fail on integer + + stream = SyncStreamFromLists(sample_packets, tags) + map_tags = MapTags(to_upper) + mapped_stream = map_tags(stream) + + # Should raise exception when processing integer tag + with pytest.raises(AttributeError): + list(mapped_stream) + + def test_map_tags_preserves_packets(self): + """Test that map tags preserves all packet types.""" + packets = [PacketType("data1"), {"key": "value"}, [1, 2, 3], 42, "string"] + tags = ["type1", "type2", "type3", "type4", "type5"] + + def add_suffix(tag): + return f"{tag}_processed" + + stream = SyncStreamFromLists(packets, tags) + map_tags = MapTags(add_suffix) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + actual_packets = [packet for packet, _ in result] + expected_tags = [f"{t}_processed" for t in tags] + actual_tags = [tag for _, tag in result] + + assert actual_packets == packets + assert actual_tags == expected_tags + + def test_map_tags_maintains_order(self): + """Test that map tags maintains packet order.""" + packets = [f"packet_{i}" for i in range(100)] + tags = [f"tag_{i}" for i in range(100)] + + def reverse_tag(tag): + return tag[::-1] # Reverse the string + + stream = SyncStreamFromLists(packets, tags) + map_tags = MapTags(reverse_tag) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + expected_tags = [f"{i}_gat" for i in range(100)] # "tag_i" reversed + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == packets + assert actual_tags == expected_tags + + def test_map_tags_with_lambda(self, sample_packets, sample_tags): + """Test map tags with lambda function.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + map_tags = MapTags(lambda t: f"λ({t})") + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + expected_tags = [f"λ({t})" for t in sample_tags] + actual_tags = [tag for _, tag in result] + + assert actual_tags == expected_tags + + def test_map_tags_chaining(self, sample_packets, sample_tags): + """Test chaining multiple map tags operations.""" + stream = SyncStreamFromLists(sample_packets, sample_tags) + + # First transformation + map1 = MapTags(lambda t: f"first_{t}") + stream1 = map1(stream) + + # Second transformation + map2 = MapTags(lambda t: f"second_{t}") + stream2 = map2(stream1) + + result = list(stream2) + + expected_tags = [f"second_first_{t}" for t in sample_tags] + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == sample_packets + assert actual_tags == expected_tags + + def test_map_tags_stateful_transformation(self): + """Test map tags with stateful transformation.""" + packets = ["a", "b", "c", "d", "e"] + tags = ["tag1", "tag2", "tag3", "tag4", "tag5"] + + class TagCounter: + def __init__(self): + self.count = 0 + + def transform(self, tag): + self.count += 1 + return f"{tag}_#{self.count}" + + counter = TagCounter() + stream = SyncStreamFromLists(packets, tags) + map_tags = MapTags(counter.transform) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + expected_tags = ["tag1_#1", "tag2_#2", "tag3_#3", "tag4_#4", "tag5_#5"] + actual_tags = [tag for _, tag in result] + + assert actual_tags == expected_tags + + def test_map_tags_with_complex_types(self): + """Test map tags with complex tag types.""" + packets = ["data1", "data2", "data3"] + tags = [ + {"id": 1, "category": "A"}, + {"id": 2, "category": "B"}, + {"id": 3, "category": "A"}, + ] + + def extract_category(tag): + if isinstance(tag, dict): + return f"cat_{tag.get('category', 'unknown')}" + return str(tag) + + stream = SyncStreamFromLists(packets, tags) + map_tags = MapTags(extract_category) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + expected_tags = ["cat_A", "cat_B", "cat_A"] + actual_tags = [tag for _, tag in result] + + assert actual_tags == expected_tags + + def test_map_tags_preserves_tag_references(self): + """Test that map tags doesn't break tag references when not needed.""" + packets = ["data1", "data2"] + shared_tag = {"shared": "reference"} + tags = [shared_tag, shared_tag] + + def conditional_transform(tag): + # Only transform if it's a string + if isinstance(tag, str): + return f"transformed_{tag}" + return tag # Keep dict unchanged + + stream = SyncStreamFromLists(packets, tags) + map_tags = MapTags(conditional_transform) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + actual_tags = [tag for _, tag in result] + + # Both tags should still reference the same object + assert actual_tags[0] is shared_tag + assert actual_tags[1] is shared_tag + assert actual_tags[0] is actual_tags[1] + + def test_map_tags_large_stream(self): + """Test map tags with large stream.""" + packets = [f"packet_{i}" for i in range(1000)] + tags = [f"tag_{i}" for i in range(1000)] + + def add_hash(tag): + return f"{tag}_{hash(tag) % 1000}" + + stream = SyncStreamFromLists(packets, tags) + map_tags = MapTags(add_hash) + mapped_stream = map_tags(stream) + + result = list(mapped_stream) + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert len(actual_packets) == 1000 + assert len(actual_tags) == 1000 + assert actual_packets == packets + + # All tags should have been transformed + assert all( + "_" in tag and tag != f"tag_{i}" for i, tag in enumerate(actual_tags) + ) + + def test_map_tags_pickle(self): + """Test that MapTags mapper is pickleable.""" + import pickle + from orcabridge.mappers import MapTags + + # MapTags takes a key mapping, not a transformation function + key_map = {"old_tag": "new_tag", "category": "type"} + map_tags = MapTags(key_map) + pickled = pickle.dumps(map_tags) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, MapTags) + assert unpickled.key_map == map_tags.key_map diff --git a/tests/test_streams_operations/test_mappers/test_merge.py b/tests/test_streams_operations/test_mappers/test_merge.py new file mode 100644 index 0000000..fb4c655 --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_merge.py @@ -0,0 +1,208 @@ +"""Tests for Merge mapper functionality.""" + +import pickle +import pytest +from orcabridge.base import PacketType +from orcabridge.mappers import Merge +from orcabridge.streams import SyncStreamFromLists + + +class TestMerge: + """Test cases for Merge mapper.""" + + def test_merge_two_streams(self, sample_packets, sample_tags): + """Test merging two streams.""" + # Create two streams + stream1 = SyncStreamFromLists(sample_packets[:2], sample_tags[:2]) + stream2 = SyncStreamFromLists(sample_packets[2:], sample_tags[2:]) + + merge = Merge() + merged_stream = merge(stream1, stream2) + + packets = [] + tags = [] + for packet, tag in merged_stream: + packets.append(packet) + tags.append(tag) + + # Should contain all packets from both streams + assert len(packets) == 3 + assert set(packets) == set(sample_packets) + assert set(tags) == set(sample_tags) + + def test_merge_multiple_streams(self, sample_packets, sample_tags): + """Test merging multiple streams.""" + # Create three streams with one packet each + streams = [] + for i in range(3): + stream = SyncStreamFromLists([sample_packets[i]], [sample_tags[i]]) + streams.append(stream) + + merge = Merge() + merged_stream = merge(*streams) + + packets = [] + tags = [] + for packet, tag in merged_stream: + packets.append(packet) + tags.append(tag) + + assert len(packets) == 3 + assert set(packets) == set(sample_packets) + assert set(tags) == set(sample_tags) + + def test_merge_empty_streams(self): + """Test merging with empty streams.""" + empty1 = SyncStreamFromLists([], []) + empty2 = SyncStreamFromLists([], []) + + merge = Merge() + merged_stream = merge(empty1, empty2) + + packets = list(merged_stream) + assert len(packets) == 0 + + def test_merge_one_empty_one_full(self, sample_stream): + """Test merging empty stream with full stream.""" + empty_stream = SyncStreamFromLists([], []) + + merge = Merge() + merged_stream = merge(sample_stream, empty_stream) + + packets = list(merged_stream) + original_packets = list(sample_stream) + + assert len(packets) == len(original_packets) + # Order might be different, so check sets + assert set(packets) == set(original_packets) + + def test_merge_different_lengths(self): + """Test merging streams of different lengths.""" + packets1 = ["a", "b"] + tags1 = ["tag1", "tag2"] + packets2 = ["c", "d", "e", "f"] + tags2 = ["tag3", "tag4", "tag5", "tag6"] + + stream1 = SyncStreamFromLists(packets1, tags1) + stream2 = SyncStreamFromLists(packets2, tags2) + + merge = Merge() + merged_stream = merge(stream1, stream2) + + packets = [] + tags = [] + for packet, tag in merged_stream: + packets.append(packet) + tags.append(tag) + + assert len(packets) == 6 + assert set(packets) == set(packets1 + packets2) + assert set(tags) == set(tags1 + tags2) + + def test_merge_single_stream(self, sample_stream): + """Test merge with single stream.""" + merge = Merge() + merged_stream = merge(sample_stream) + + packets = list(merged_stream) + original_packets = list(sample_stream) + + assert packets == original_packets + + def test_merge_preserves_packet_types(self): + """Test that merge preserves different packet types.""" + packets1 = [PacketType("data1"), {"key1": "value1"}] + tags1 = ["str1", "dict1"] + packets2 = [[1, 2], 42] + tags2 = ["list1", "int1"] + + stream1 = SyncStreamFromLists(packets1, tags1) + stream2 = SyncStreamFromLists(packets2, tags2) + + merge = Merge() + merged_stream = merge(stream1, stream2) + + result_packets = [] + for packet, _ in merged_stream: + result_packets.append(packet) + + assert len(result_packets) == 4 + assert set(result_packets) == set(packets1 + packets2) + + def test_merge_order_independence(self, sample_packets, sample_tags): + """Test that merge order doesn't affect final result set.""" + stream1 = SyncStreamFromLists(sample_packets[:2], sample_tags[:2]) + stream2 = SyncStreamFromLists(sample_packets[2:], sample_tags[2:]) + + merge = Merge() + + # Merge in one order + merged1 = merge(stream1, stream2) + packets1 = set(p for p, _ in merged1) + + # Merge in reverse order (need to recreate streams) + stream1_new = SyncStreamFromLists(sample_packets[:2], sample_tags[:2]) + stream2_new = SyncStreamFromLists(sample_packets[2:], sample_tags[2:]) + merged2 = merge(stream2_new, stream1_new) + packets2 = set(p for p, _ in merged2) + + assert packets1 == packets2 + + def test_merge_with_duplicate_packets(self): + """Test merging streams with duplicate packets.""" + packets1 = ["a", "b"] + tags1 = ["tag1", "tag2"] + packets2 = ["a", "c"] # "a" appears in both streams + tags2 = ["tag3", "tag4"] + + stream1 = SyncStreamFromLists(packets1, tags1) + stream2 = SyncStreamFromLists(packets2, tags2) + + merge = Merge() + merged_stream = merge(stream1, stream2) + + packets = [] + for packet, _ in merged_stream: + packets.append(packet) + + # Should include duplicates + assert len(packets) == 4 + assert packets.count("a") == 2 + assert "b" in packets + assert "c" in packets + + def test_merge_no_streams_error(self): + """Test that merge with no streams raises an error.""" + merge = Merge() + + with pytest.raises(TypeError): + merge() + + def test_merge_large_number_of_streams(self): + """Test merging a large number of streams.""" + streams = [] + all_packets = [] + + for i in range(10): + packets = [f"packet_{i}"] + tags = [f"tag_{i}"] + streams.append(SyncStreamFromLists(packets, tags)) + all_packets.extend(packets) + + merge = Merge() + merged_stream = merge(*streams) + + result_packets = [] + for packet, _ in merged_stream: + result_packets.append(packet) + + assert len(result_packets) == 10 + assert set(result_packets) == set(all_packets) def test_merge_pickle(self): + """Test that Merge mapper is pickleable.""" + merge = Merge() + pickled = pickle.dumps(merge) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, Merge) + assert unpickled.__class__.__name__ == "Merge" diff --git a/tests/test_streams_operations/test_mappers/test_repeat.py b/tests/test_streams_operations/test_mappers/test_repeat.py new file mode 100644 index 0000000..b8a4a98 --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_repeat.py @@ -0,0 +1,186 @@ +"""Tests for Repeat mapper functionality.""" + +import pytest +import pickle +from orcabridge.mappers import Repeat + + +class TestRepeat: + """Test cases for Repeat mapper.""" + + def test_repeat_basic(self, sample_stream): + """Test basic repeat functionality.""" + repeat = Repeat(3) + repeated_stream = repeat(sample_stream) + + packets = list(repeated_stream) + + # Should have 3 times the original packets + assert len(packets) == 9 # 3 original * 3 repeats + + # Check that each packet appears 3 times consecutively + original_packets = list(sample_stream) + expected_packets = [] + for packet in original_packets: + expected_packets.extend([packet] * 3) + + assert packets == expected_packets + + def test_repeat_zero(self, sample_stream): + """Test repeat with count 0.""" + repeat = Repeat(0) + repeated_stream = repeat(sample_stream) + + packets = list(repeated_stream) + assert len(packets) == 0 + + def test_repeat_one(self, sample_stream): + """Test repeat with count 1.""" + repeat = Repeat(1) + repeated_stream = repeat(sample_stream) + + packets = list(repeated_stream) + original_packets = list(sample_stream) + + assert packets == original_packets + + def test_repeat_with_tags(self, sample_packets, sample_tags): + """Test repeat preserves tags correctly.""" + from orcabridge.streams import SyncStreamFromLists + + stream = SyncStreamFromLists(tags=sample_tags, packets=sample_packets) + repeat = Repeat(2) + repeated_stream = repeat(stream) + + packets = [] + tags = [] + for tag, packet in repeated_stream: + packets.append(packet) + tags.append(tag) + + # Each packet should appear twice with its corresponding tag + assert len(packets) == 6 # 3 original * 2 repeats + assert len(tags) == 6 + + # Check pattern: [p1,p1,p2,p2,p3,p3] with [t1,t1,t2,t2,t3,t3] + expected_packets = [] + expected_tags = [] + for p, t in zip(sample_packets, sample_tags): + expected_packets.extend([p, p]) + expected_tags.extend([t, t]) + + assert packets == expected_packets + assert tags == expected_tags + + def test_repeat_with_empty_stream(self): + """Test repeat with empty stream.""" + from orcabridge.streams import SyncStreamFromLists + + empty_stream = SyncStreamFromLists(tags=[], packets=[]) + repeat = Repeat(5) + repeated_stream = repeat(empty_stream) + + packets = list(repeated_stream) + assert len(packets) == 0 + + def test_repeat_large_count(self, sample_stream): + """Test repeat with large count.""" + repeat = Repeat(100) + repeated_stream = repeat(sample_stream) + + packets = list(repeated_stream) + assert len(packets) == 300 # 3 original * 100 repeats + + def test_repeat_negative_count(self): + """Test repeat with negative count raises error.""" + with pytest.raises(ValueError): + Repeat(-1) + + def test_repeat_non_integer_count(self): + """Test repeat with non-integer count.""" + with pytest.raises(TypeError): + Repeat(3.5) + + with pytest.raises(TypeError): + Repeat("3") + + def test_repeat_preserves_packet_types(self, sample_stream): + """Test that repeat preserves different packet types.""" + # Create stream with mixed packet types + from orcabridge.streams import SyncStreamFromLists + + packets = [ + {"data": "data1"}, + {"key": "value"}, + {"items": ["a", "b", "c"]}, + {"number": "42"}, + ] + tags = [{"type": "str"}, {"type": "dict"}, {"type": "list"}, {"type": "int"}] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + repeat = Repeat(2) + repeated_stream = repeat(stream) + + result_packets = [] + for tag, packet in repeated_stream: + result_packets.append(packet) + + expected = [ + {"data": "data1"}, + {"data": "data1"}, + {"key": "value"}, + {"key": "value"}, + {"items": ["a", "b", "c"]}, + {"items": ["a", "b", "c"]}, + {"number": "42"}, + {"number": "42"}, + ] + + assert result_packets == expected + + def test_repeat_chaining(self, sample_stream): + """Test chaining multiple repeat operations.""" + repeat1 = Repeat(2) + repeat2 = Repeat(3) + + # Apply first repeat + stream1 = repeat1(sample_stream) + # Apply second repeat + stream2 = repeat2(stream1) + + packets = list(stream2) + + # Should have 3 original * 2 * 3 = 18 packets + assert len(packets) == 18 + + # Each original packet should appear 6 times consecutively + original_packets = list(sample_stream) + expected = [] + for packet in original_packets: + expected.extend([packet] * 6) + + assert packets == expected + + def test_repeat_pickle(self): + """Test that Repeat mapper is pickleable.""" + repeat = Repeat(5) + + # Test pickle/unpickle + pickled = pickle.dumps(repeat) + unpickled = pickle.loads(pickled) + + # Verify the unpickled mapper has the same properties + assert unpickled.repeat_count == repeat.repeat_count + + # Test that the unpickled mapper works correctly + from orcabridge.streams import SyncStreamFromLists + + tags = [{"id": "1"}, {"id": "2"}] + packets = [{"data": "file1.txt"}, {"data": "file2.txt"}] + stream = SyncStreamFromLists(tags=tags, packets=packets) + + original_results = list(repeat(stream)) + unpickled_results = list(unpickled(stream)) + + assert original_results == unpickled_results + assert len(original_results) == 10 # 2 * 5 repeats diff --git a/tests/test_streams_operations/test_mappers/test_transform.py b/tests/test_streams_operations/test_mappers/test_transform.py new file mode 100644 index 0000000..495081e --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_transform.py @@ -0,0 +1,364 @@ +"""Tests for Transform mapper functionality.""" + +import pytest +from orcabridge.base import PacketType +from orcabridge.mapper import Transform +from orcabridge.stream import SyncStreamFromLists + + +class TestTransform: + """Test cases for Transform mapper.""" + + def test_transform_basic(self, simple_transform): + """Test basic transform functionality.""" + packets = ["hello", "world", "test"] + tags = ["greeting", "noun", "action"] + + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(simple_transform) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_packets = ["HELLO", "WORLD", "TEST"] + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == tags # Tags should be preserved + + def test_transform_with_tag_modification(self): + """Test transform that modifies both packet and tag.""" + packets = [1, 2, 3, 4, 5] + tags = ["num1", "num2", "num3", "num4", "num5"] + + def double_and_prefix_tag(packet, tag): + return packet * 2, f"doubled_{tag}" + + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(double_and_prefix_tag) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_packets = [2, 4, 6, 8, 10] + expected_tags = [ + "doubled_num1", + "doubled_num2", + "doubled_num3", + "doubled_num4", + "doubled_num5", + ] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_transform_packet_only(self, sample_packets, sample_tags): + """Test transform that only modifies packets.""" + + def add_prefix(packet, tag): + return f"transformed_{packet}", tag + + stream = SyncStreamFromLists(sample_packets, sample_tags) + transform_mapper = Transform(add_prefix) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_packets = [f"transformed_{p}" for p in sample_packets] + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == sample_tags + + def test_transform_tag_only(self, sample_packets, sample_tags): + """Test transform that only modifies tags.""" + + def add_tag_suffix(packet, tag): + return packet, f"{tag}_processed" + + stream = SyncStreamFromLists(sample_packets, sample_tags) + transform_mapper = Transform(add_tag_suffix) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_tags = [f"{t}_processed" for t in sample_tags] + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == sample_packets + assert actual_tags == expected_tags + + def test_transform_empty_stream(self): + """Test transform with empty stream.""" + + def dummy_transform(packet, tag): + return packet, tag + + empty_stream = SyncStreamFromLists([], []) + transform_mapper = Transform(dummy_transform) + transformed_stream = transform_mapper(empty_stream) + + result = list(transformed_stream) + assert len(result) == 0 + + def test_transform_type_conversion(self): + """Test transform with type conversion.""" + packets = ["1", "2", "3", "4", "5"] + tags = ["str1", "str2", "str3", "str4", "str5"] + + def str_to_int_with_tag(packet, tag): + return int(packet), f"int_{tag}" + + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(str_to_int_with_tag) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_packets = [1, 2, 3, 4, 5] + expected_tags = ["int_str1", "int_str2", "int_str3", "int_str4", "int_str5"] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + assert all(isinstance(p, int) for p in actual_packets) + + def test_transform_complex_data(self): + """Test transform with complex data structures.""" + packets = [ + {"name": "alice", "age": 25}, + {"name": "bob", "age": 30}, + {"name": "charlie", "age": 35}, + ] + tags = ["person1", "person2", "person3"] + + def enrich_person_data(packet, tag): + enriched = packet.copy() + enriched["category"] = "adult" if packet["age"] >= 30 else "young" + return enriched, f"enriched_{tag}" + + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(enrich_person_data) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_packets = [ + {"name": "alice", "age": 25, "category": "young"}, + {"name": "bob", "age": 30, "category": "adult"}, + {"name": "charlie", "age": 35, "category": "adult"}, + ] + expected_tags = ["enriched_person1", "enriched_person2", "enriched_person3"] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_transform_with_none_values(self): + """Test transform with None values.""" + packets = [1, None, 3, None, 5] + tags = ["num1", "null1", "num3", "null2", "num5"] + + def handle_none_transform(packet, tag): + if packet is None: + return "MISSING", f"missing_{tag}" + else: + return packet * 2, f"doubled_{tag}" + + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(handle_none_transform) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_packets = [2, "MISSING", 6, "MISSING", 10] + expected_tags = [ + "doubled_num1", + "missing_null1", + "doubled_num3", + "missing_null2", + "doubled_num5", + ] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_transform_preserves_order(self): + """Test that transform preserves packet order.""" + packets = [f"packet_{i}" for i in range(100)] + tags = [f"tag_{i}" for i in range(100)] + + def add_index(packet, tag): + index = int(packet.split("_")[1]) + return f"indexed_{index}_{packet}", f"indexed_{tag}" + + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(add_index) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_packets = [f"indexed_{i}_packet_{i}" for i in range(100)] + expected_tags = [f"indexed_tag_{i}" for i in range(100)] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_transform_exception_handling(self): + """Test transform when transformation function raises exception.""" + packets = [1, 2, "invalid", 4] + tags = ["num1", "num2", "str1", "num4"] + + def divide_transform(packet, tag): + return 10 / packet, f"divided_{tag}" # Will fail on "invalid" + + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(divide_transform) + transformed_stream = transform_mapper(stream) + + # Should raise exception when processing "invalid" + with pytest.raises(TypeError): + list(transformed_stream) + + def test_transform_with_lambda(self): + """Test transform with lambda function.""" + packets = [1, 2, 3, 4, 5] + tags = ["a", "b", "c", "d", "e"] + + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(lambda p, t: (p**2, t.upper())) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_packets = [1, 4, 9, 16, 25] + expected_tags = ["A", "B", "C", "D", "E"] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_transform_chaining(self): + """Test chaining multiple transform operations.""" + packets = [1, 2, 3, 4, 5] + tags = ["num1", "num2", "num3", "num4", "num5"] + + stream = SyncStreamFromLists(packets, tags) + + # First transformation: double the packet + transform1 = Transform(lambda p, t: (p * 2, f"doubled_{t}")) + stream1 = transform1(stream) + + # Second transformation: add 10 to packet + transform2 = Transform(lambda p, t: (p + 10, f"added_{t}")) + stream2 = transform2(stream1) + + result = list(stream2) + + expected_packets = [12, 14, 16, 18, 20] # (original * 2) + 10 + expected_tags = [ + "added_doubled_num1", + "added_doubled_num2", + "added_doubled_num3", + "added_doubled_num4", + "added_doubled_num5", + ] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_transform_with_packet_type(self): + """Test transform with PacketType objects.""" + packets = [PacketType("data1"), PacketType("data2")] + tags = ["type1", "type2"] + + def extract_and_modify(packet, tag): + data = str(packet) # Convert to string + return f"extracted_{data}", f"processed_{tag}" + + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(extract_and_modify) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert len(actual_packets) == 2 + assert all("extracted_" in p for p in actual_packets) + assert actual_tags == ["processed_type1", "processed_type2"] + + def test_transform_stateful(self): + """Test transform with stateful transformation.""" + packets = [1, 2, 3, 4, 5] + tags = ["n1", "n2", "n3", "n4", "n5"] + + class StatefulTransform: + def __init__(self): + self.counter = 0 + + def transform(self, packet, tag): + self.counter += 1 + return (packet + self.counter, f"{tag}_step_{self.counter}") + + stateful = StatefulTransform() + stream = SyncStreamFromLists(packets, tags) + transform_mapper = Transform(stateful.transform) + transformed_stream = transform_mapper(stream) + + result = list(transformed_stream) + + expected_packets = [2, 4, 6, 8, 10] # packet + step_number + expected_tags = [ + "n1_step_1", + "n2_step_2", + "n3_step_3", + "n4_step_4", + "n5_step_5", + ] + + actual_packets = [packet for packet, _ in result] + actual_tags = [tag for _, tag in result] + + assert actual_packets == expected_packets + assert actual_tags == expected_tags + + def test_transform_pickle(self): + """Test that Transform mapper is pickleable.""" + import pickle + from orcabridge.mappers import Transform + + def add_prefix(tag, packet): + new_tag = {**tag, "prefix": "test"} + new_packet = {**packet, "processed": True} + return new_tag, new_packet + + transform = Transform(add_prefix) + pickled = pickle.dumps(transform) + unpickled = pickle.loads(pickled) + + # Test that unpickled mapper works the same + assert isinstance(unpickled, Transform) + assert unpickled.__class__.__name__ == "Transform" diff --git a/tests/test_streams_operations/test_mappers/test_utility_functions.py b/tests/test_streams_operations/test_mappers/test_utility_functions.py new file mode 100644 index 0000000..9cae09e --- /dev/null +++ b/tests/test_streams_operations/test_mappers/test_utility_functions.py @@ -0,0 +1,248 @@ +"""Tests for utility functions tag() and packet().""" + +from orcabridge.mappers import tag, packet +from orcabridge.streams import SyncStreamFromLists + + +class TestUtilityFunctions: + """Test cases for tag() and packet() utility functions.""" + + def test_tag_function_basic(self): + """Test basic tag() function functionality.""" + tags = [ + {"old_key": "value1", "other": "data1"}, + {"old_key": "value2", "other": "data2"}, + ] + packets = [ + {"data": "packet1"}, + {"data": "packet2"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + tag_mapper = tag({"old_key": "new_key"}) + transformed_stream = tag_mapper(stream) + + results = list(transformed_stream) + + assert len(results) == 2 + for (result_tag, result_packet), original_packet in zip(results, packets): + # Tag should be transformed + assert "new_key" in result_tag + assert "old_key" not in result_tag # old key dropped by default + assert result_tag["new_key"] in ["value1", "value2"] + + # Packet should be unchanged + assert result_packet == original_packet + + def test_tag_function_keep_unmapped(self): + """Test tag() function with drop_unmapped=False.""" + tags = [ + {"old_key": "value1", "keep_this": "data1"}, + ] + packets = [ + {"data": "packet1"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + tag_mapper = tag({"old_key": "new_key"}, drop_unmapped=False) + transformed_stream = tag_mapper(stream) + + results = list(transformed_stream) + + assert len(results) == 1 + result_tag, result_packet = results[0] + + # Should have both mapped and unmapped keys + assert result_tag["new_key"] == "value1" + assert result_tag["keep_this"] == "data1" + + def test_packet_function_basic(self): + """Test basic packet() function functionality.""" + tags = [ + {"tag_data": "tag1"}, + {"tag_data": "tag2"}, + ] + packets = [ + {"old_key": "value1", "other": "data1"}, + {"old_key": "value2", "other": "data2"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + packet_mapper = packet({"old_key": "new_key"}) + transformed_stream = packet_mapper(stream) + + results = list(transformed_stream) + + assert len(results) == 2 + for (result_tag, result_packet), original_tag in zip(results, tags): + # Tag should be unchanged + assert result_tag == original_tag + + # Packet should be transformed + assert "new_key" in result_packet + assert "old_key" not in result_packet # old key dropped by default + assert result_packet["new_key"] in ["value1", "value2"] + + def test_packet_function_keep_unmapped(self): + """Test packet() function with drop_unmapped=False.""" + tags = [ + {"tag_data": "tag1"}, + ] + packets = [ + {"old_key": "value1", "keep_this": "data1"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + packet_mapper = packet({"old_key": "new_key"}, drop_unmapped=False) + transformed_stream = packet_mapper(stream) + + results = list(transformed_stream) + + assert len(results) == 1 + result_tag, result_packet = results[0] + + # Should have both mapped and unmapped keys + assert result_packet["new_key"] == "value1" + assert result_packet["keep_this"] == "data1" + + def test_tag_function_empty_mapping(self): + """Test tag() function with empty mapping.""" + tags = [ + {"key1": "value1", "key2": "value2"}, + ] + packets = [ + {"data": "packet1"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + tag_mapper = tag({}) # Empty mapping + transformed_stream = tag_mapper(stream) + + results = list(transformed_stream) + + assert len(results) == 1 + result_tag, result_packet = results[0] + + # With empty mapping and drop_unmapped=True (default), all keys should be dropped + assert result_tag == {} + assert result_packet == packets[0] # Packet unchanged + + def test_packet_function_empty_mapping(self): + """Test packet() function with empty mapping.""" + tags = [ + {"tag_data": "tag1"}, + ] + packets = [ + {"key1": "value1", "key2": "value2"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + packet_mapper = packet({}) # Empty mapping + transformed_stream = packet_mapper(stream) + + results = list(transformed_stream) + + assert len(results) == 1 + result_tag, result_packet = results[0] + + # With empty mapping and drop_unmapped=True (default), all keys should be dropped + assert result_tag == tags[0] # Tag unchanged + assert result_packet == {} + + def test_tag_function_chaining(self): + """Test chaining multiple tag() transformations.""" + tags = [ + {"a": "value1", "b": "value2", "c": "value3"}, + ] + packets = [ + {"data": "packet1"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + + # Chain transformations + tag_mapper1 = tag({"a": "new_a"}, drop_unmapped=False) + tag_mapper2 = tag({"b": "new_b"}, drop_unmapped=False) + + transformed_stream = tag_mapper2(tag_mapper1(stream)) + + results = list(transformed_stream) + + assert len(results) == 1 + result_tag, result_packet = results[0] + + # Should have transformations from both mappers + assert result_tag["new_a"] == "value1" + assert result_tag["new_b"] == "value2" + assert result_tag["c"] == "value3" # Unchanged + + def test_packet_function_chaining(self): + """Test chaining multiple packet() transformations.""" + tags = [ + {"tag_data": "tag1"}, + ] + packets = [ + {"a": "value1", "b": "value2", "c": "value3"}, + ] + + stream = SyncStreamFromLists(tags=tags, packets=packets) + + # Chain transformations + packet_mapper1 = packet({"a": "new_a"}, drop_unmapped=False) + packet_mapper2 = packet({"b": "new_b"}, drop_unmapped=False) + + transformed_stream = packet_mapper2(packet_mapper1(stream)) + + results = list(transformed_stream) + + assert len(results) == 1 + result_tag, result_packet = results[0] + + # Should have transformations from both mappers + assert result_packet["new_a"] == "value1" + assert result_packet["new_b"] == "value2" + assert result_packet["c"] == "value3" # Unchanged + + def test_utility_functions_pickle(self): + """Test that utility functions tag() and packet() are pickleable.""" + import pickle + + # Test tag() function + tag_mapper = tag({"old_key": "new_key"}) + pickled_tag = pickle.dumps(tag_mapper) + unpickled_tag = pickle.loads(pickled_tag) + + # Test that unpickled tag mapper works + assert callable(unpickled_tag) + + # Test packet() function + packet_mapper = packet({"old_key": "new_key"}) + pickled_packet = pickle.dumps(packet_mapper) + unpickled_packet = pickle.loads(pickled_packet) + + # Test that unpickled packet mapper works + assert callable(unpickled_packet) + + def test_utility_functions_with_complex_streams(self, sample_stream): + """Test utility functions with complex streams from fixtures.""" + # Test tag() with sample stream + tag_mapper = tag({"file_name": "filename"}, drop_unmapped=False) + transformed_stream = tag_mapper(sample_stream) + + results = list(transformed_stream) + + for result_tag, _ in results: + assert "filename" in result_tag + assert result_tag["filename"] in ["day1", "day2", "day3"] + assert "session" in result_tag # Kept because drop_unmapped=False + + # Test packet() with sample stream + packet_mapper = packet({"txt_file": "text_file"}, drop_unmapped=False) + transformed_stream = packet_mapper(sample_stream) + + results = list(transformed_stream) + + for _, result_packet in results: + assert "text_file" in result_packet + assert "data" in result_packet["text_file"] + assert "metadata" in result_packet # Kept because drop_unmapped=False diff --git a/tests/test_streams_operations/test_pipelines/__init__.py b/tests/test_streams_operations/test_pipelines/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_streams_operations/test_pipelines/test_basic_pipelines.py b/tests/test_streams_operations/test_pipelines/test_basic_pipelines.py new file mode 100644 index 0000000..75784a7 --- /dev/null +++ b/tests/test_streams_operations/test_pipelines/test_basic_pipelines.py @@ -0,0 +1,542 @@ +""" +Test module for basic pipeline operations. + +This module tests fundamental pipeline construction and execution, +including chaining operations, combining multiple streams, and +basic data flow patterns as demonstrated in the notebooks. +""" + +import pytest +import tempfile +from pathlib import Path + +from orcabridge.base import SyncStream +from orcabridge.stream import SyncStreamFromLists +from orcabridge.mapper import ( + Join, + Merge, + Filter, + Transform, + MapPackets, + MapTags, + Repeat, + DefaultTag, + Batch, + FirstMatch, +) +from orcabridge.sources import GlobSource +from orcabridge.pod import FunctionPod + + +@pytest.fixture +def temp_files(): + """Create temporary files for testing.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create test files + files = {} + for i in range(1, 4): + file_path = temp_path / f"test_{i}.txt" + content = f"Content of file {i}\nLine 2 of file {i}" + with open(file_path, "w") as f: + f.write(content) + files[f"test_{i}.txt"] = file_path + + yield temp_path, files + + +@pytest.fixture +def sample_user_data(): + """Sample user data for pipeline testing.""" + return [ + ({"user_id": 1, "session": "a"}, {"name": "Alice", "age": 25, "score": 85}), + ({"user_id": 2, "session": "a"}, {"name": "Bob", "age": 30, "score": 92}), + ({"user_id": 3, "session": "b"}, {"name": "Charlie", "age": 28, "score": 78}), + ({"user_id": 1, "session": "b"}, {"name": "Alice", "age": 25, "score": 88}), + ] + + +@pytest.fixture +def sample_metadata(): + """Sample metadata for joining.""" + return [ + ({"user_id": 1}, {"department": "Engineering", "level": "Senior"}), + ({"user_id": 2}, {"department": "Marketing", "level": "Junior"}), + ({"user_id": 3}, {"department": "Engineering", "level": "Mid"}), + ] + + +class TestBasicPipelineConstruction: + """Test basic pipeline construction patterns.""" + + def test_simple_linear_pipeline(self, sample_user_data): + """Test simple linear pipeline with chained operations.""" + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Build pipeline: filter -> transform -> map packets + pipeline = ( + source_stream + >> Filter(lambda tag, packet: packet["age"] >= 28) + >> Transform( + lambda tag, packet: (tag, {**packet, "category": "experienced"}) + ) + >> MapPackets({"name": "full_name", "score": "performance"}) + ) + + result = list(pipeline) + + # Should have filtered out users under 28 + assert len(result) == 3 + + # Check transformations applied + for tag, packet in result: + assert packet["age"] >= 28 + assert packet["category"] == "experienced" + assert "full_name" in packet + assert "performance" in packet + assert "name" not in packet # Should be mapped + assert "score" not in packet # Should be mapped + + def test_pipeline_with_join(self, sample_user_data, sample_metadata): + """Test pipeline with join operation.""" + # Create streams + user_tags, user_packets = zip(*sample_user_data) + meta_tags, meta_packets = zip(*sample_metadata) + + user_stream = SyncStreamFromLists(list(user_tags), list(user_packets)) + meta_stream = SyncStreamFromLists(list(meta_tags), list(meta_packets)) + + # Join streams on user_id + joined = Join()(user_stream, meta_stream) + result = list(joined) + + # Should have joined records where user_id matches + assert len(result) >= 2 # At least Alice and Bob should match + + # Check that joined data has both user and metadata info + for tag, packet in result: + assert "user_id" in tag + assert "name" in packet # From user data + assert "department" in packet # From metadata + + def test_pipeline_with_merge(self, sample_user_data): + """Test pipeline with merge operation.""" + tags, packets = zip(*sample_user_data) + + # Split data into two streams + stream1 = SyncStreamFromLists(list(tags[:2]), list(packets[:2])) + stream2 = SyncStreamFromLists(list(tags[2:]), list(packets[2:])) + + # Merge streams + merged = Merge()(stream1, stream2) + result = list(merged) + + # Should have all items from both streams + assert len(result) == 4 + + # Order might be different but all data should be present + result_user_ids = [tag["user_id"] for tag, packet in result] + expected_user_ids = [tag["user_id"] for tag, packet in sample_user_data] + assert sorted(result_user_ids) == sorted(expected_user_ids) + + def test_pipeline_with_batch_processing(self, sample_user_data): + """Test pipeline with batch processing.""" + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Create batches of size 2 + batched = Batch(batch_size=2)(source_stream) + result = list(batched) + + # Should have 2 batches (4 items / 2 per batch) + assert len(result) == 2 + + # Each result should be a batch + for tag, packet in result: + assert isinstance(packet, list) + assert len(packet) == 2 + # Tag should be batch representation of individual tags + assert isinstance(tag, dict) + + def test_pipeline_with_repeat_operation(self, sample_user_data): + """Test pipeline with repeat operation.""" + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists( + list(tags[:2]), list(packets[:2]) + ) # Use first 2 items + + # Repeat each item 3 times + repeated = Repeat(repeat_count=3)(source_stream) + result = list(repeated) + + # Should have 6 items total (2 original * 3 repeats) + assert len(result) == 6 + + # Check that items are correctly repeated + assert result[0] == result[1] == result[2] # First item repeated + assert result[3] == result[4] == result[5] # Second item repeated + + def test_complex_multi_stage_pipeline(self, sample_user_data, sample_metadata): + """Test complex pipeline with multiple stages and branches.""" + # Create source streams + user_tags, user_packets = zip(*sample_user_data) + meta_tags, meta_packets = zip(*sample_metadata) + + user_stream = SyncStreamFromLists(list(user_tags), list(user_packets)) + meta_stream = SyncStreamFromLists(list(meta_tags), list(meta_packets)) + + # Complex pipeline: + # 1. Add default tags to user stream + # 2. Join with metadata + # 3. Filter by age and score + # 4. Transform and map fields + pipeline = ( + DefaultTag({"source": "user_system"})(user_stream) + * meta_stream # Join operation + >> Filter(lambda tag, packet: packet["age"] >= 25 and packet["score"] >= 80) + >> Transform( + lambda tag, packet: ( + {**tag, "processed": True}, + {**packet, "grade": "A" if packet["score"] >= 90 else "B"}, + ) + ) + >> MapPackets({"name": "employee_name", "department": "dept"}) + ) + + result = list(pipeline) + + # Verify complex transformations + for tag, packet in result: + assert tag["source"] == "user_system" + assert tag["processed"] is True + assert packet["age"] >= 25 + assert packet["score"] >= 80 + assert packet["grade"] in ["A", "B"] + assert "employee_name" in packet + assert "dept" in packet + + def test_pipeline_error_propagation(self, sample_user_data): + """Test that errors propagate correctly through pipeline.""" + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Create pipeline with operation that will fail + def failing_transform(tag, packet): + if packet["age"] > 29: + raise ValueError("Age too high!") + return tag, packet + + pipeline = source_stream >> Transform(failing_transform) + + # Should propagate the error + with pytest.raises(ValueError, match="Age too high!"): + list(pipeline) + + def test_pipeline_with_empty_stream(self): + """Test pipeline behavior with empty streams.""" + empty_stream = SyncStreamFromLists([], []) + + # Apply operations to empty stream + pipeline = ( + empty_stream + >> Filter(lambda tag, packet: True) + >> Transform(lambda tag, packet: (tag, {**packet, "processed": True})) + ) + + result = list(pipeline) + assert result == [] + + def test_pipeline_with_first_match(self, sample_user_data, sample_metadata): + """Test pipeline with FirstMatch operation.""" + user_tags, user_packets = zip(*sample_user_data) + meta_tags, meta_packets = zip(*sample_metadata) + + user_stream = SyncStreamFromLists(list(user_tags), list(user_packets)) + meta_stream = SyncStreamFromLists(list(meta_tags), list(meta_packets)) + + # Use FirstMatch instead of Join + matched = FirstMatch()(user_stream, meta_stream) + result = list(matched) + + # FirstMatch should consume items from both streams + assert len(result) <= len(sample_user_data) + + # Each result should have matched data + for tag, packet in result: + assert "user_id" in tag + assert "name" in packet or "department" in packet + + +class TestPipelineDataFlow: + """Test data flow patterns in pipelines.""" + + def test_data_preservation_through_pipeline(self, sample_user_data): + """Test that data is correctly preserved through transformations.""" + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Track original data + original_user_ids = [tag["user_id"] for tag, packet in sample_user_data] + original_names = [packet["name"] for tag, packet in sample_user_data] + + # Pipeline that shouldn't lose data + pipeline = ( + source_stream + >> MapTags({"user_id": "id"}) # Rename tag field + >> MapPackets({"name": "username"}) # Rename packet field + ) + + result = list(pipeline) + + # Check data preservation + result_ids = [tag["id"] for tag, packet in result] + result_names = [packet["username"] for tag, packet in result] + + assert sorted(result_ids) == sorted(original_user_ids) + assert sorted(result_names) == sorted(original_names) + + def test_data_aggregation_pipeline(self, sample_user_data): + """Test pipeline that aggregates data.""" + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Aggregate by session + def aggregate_by_session(tag, packet): + return {"session": tag["session"]}, { + "users": [packet["name"]], + "avg_score": packet["score"], + "count": 1, + } + + # Transform and then batch by session (simplified aggregation) + pipeline = source_stream >> Transform(aggregate_by_session) + + result = list(pipeline) + + # Should have transformed all items + assert len(result) == len(sample_user_data) + + # Check session-based grouping + sessions = [tag["session"] for tag, packet in result] + assert "a" in sessions + assert "b" in sessions + + def test_conditional_processing_pipeline(self, sample_user_data): + """Test pipeline with conditional processing branches.""" + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Split into high and low performers + high_performers = ( + source_stream + >> Filter(lambda tag, packet: packet["score"] >= 85) + >> Transform( + lambda tag, packet: ( + {**tag, "category": "high"}, + {**packet, "bonus": packet["score"] * 0.1}, + ) + ) + ) + + low_performers = ( + source_stream + >> Filter(lambda tag, packet: packet["score"] < 85) + >> Transform( + lambda tag, packet: ( + {**tag, "category": "low"}, + {**packet, "training": True}, + ) + ) + ) + + # Merge results + combined = Merge()(high_performers, low_performers) + result = list(combined) + + # Check that all items are categorized + categories = [tag["category"] for tag, packet in result] + assert "high" in categories + assert "low" in categories + + # Check conditional processing + for tag, packet in result: + if tag["category"] == "high": + assert "bonus" in packet + assert packet["score"] >= 85 + else: + assert "training" in packet + assert packet["score"] < 85 + + +class TestPipelineWithSources: + """Test pipelines starting from sources.""" + + def test_pipeline_from_glob_source(self, temp_files): + """Test pipeline starting from GlobSource.""" + temp_dir, files = temp_files + + # Create source + source = GlobSource(str(temp_dir / "*.txt")) + + # Build pipeline + pipeline = ( + source + >> Transform( + lambda tag, packet: ( + {**tag, "processed": True}, + {**packet, "line_count": len(packet["content"].split("\n"))}, + ) + ) + >> Filter(lambda tag, packet: packet["line_count"] >= 2) + ) + + result = list(pipeline) + + # Should have all files (each has 2 lines) + assert len(result) == 3 + + # Check processing + for tag, packet in result: + assert tag["processed"] is True + assert packet["line_count"] == 2 + assert "path" in tag + + def test_pipeline_with_function_pod(self, sample_user_data): + """Test pipeline with FunctionPod processing.""" + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Create processing function + def enrich_user_data(tag, packet): + """Add computed fields to user data.""" + return tag, { + **packet, + "age_group": "young" if packet["age"] < 30 else "mature", + "performance": "excellent" if packet["score"] >= 90 else "good", + } + + # Create pod + processor = FunctionPod(enrich_user_data) + + # Build pipeline + pipeline = ( + source_stream + >> processor + >> Filter(lambda tag, packet: packet["performance"] == "excellent") + ) + + result = list(pipeline) + + # Check processing + for tag, packet in result: + assert packet["performance"] == "excellent" + assert packet["age_group"] in ["young", "mature"] + assert packet["score"] >= 90 + + +class TestPipelineOptimization: + """Test pipeline optimization and efficiency.""" + + def test_pipeline_lazy_evaluation(self, sample_user_data): + """Test that pipeline operations are lazily evaluated.""" + call_log = [] + + def logging_transform(tag, packet): + call_log.append(f"processing_{tag['user_id']}") + return tag, packet + + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Build pipeline but don't execute + pipeline = ( + source_stream + >> Transform(logging_transform) + >> Filter(lambda tag, packet: packet["age"] >= 28) + ) + + # No processing should have happened yet + assert call_log == [] + + # Start consuming pipeline + iterator = iter(pipeline) + next(iterator) + + # Now some processing should have happened + assert len(call_log) >= 1 + + def test_pipeline_memory_efficiency(self): + """Test pipeline memory efficiency with large data.""" + + def large_data_generator(): + for i in range(1000): + yield ({"id": i}, {"value": i * 2, "data": f"item_{i}"}) + + # Create pipeline that processes large stream + from orcabridge.stream import SyncStreamFromGenerator + + source = SyncStreamFromGenerator(large_data_generator) + pipeline = ( + source + >> Filter(lambda tag, packet: tag["id"] % 10 == 0) # Keep every 10th item + >> Transform(lambda tag, packet: (tag, {**packet, "filtered": True})) + ) + + # Process in chunks + count = 0 + for tag, packet in pipeline: + assert packet["filtered"] is True + assert tag["id"] % 10 == 0 + count += 1 + + if count >= 10: # Don't process all items + break + + assert count == 10 + + def test_pipeline_error_recovery(self, sample_user_data): + """Test pipeline behavior with partial errors.""" + tags, packets = zip(*sample_user_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + def sometimes_failing_transform(tag, packet): + if packet["name"] == "Bob": # Fail for Bob + raise ValueError("Bob processing failed") + return tag, {**packet, "processed": True} + + # This pipeline will fail partway through + pipeline = source_stream >> Transform(sometimes_failing_transform) + + # Should fail when reaching Bob + with pytest.raises(ValueError, match="Bob processing failed"): + list(pipeline) + + def test_pipeline_reusability(self, sample_user_data): + """Test that pipeline components can be reused.""" + # Create reusable operations + age_filter = Filter(lambda tag, packet: packet["age"] >= 28) + score_transform = Transform( + lambda tag, packet: ( + tag, + {**packet, "grade": "A" if packet["score"] >= 90 else "B"}, + ) + ) + + tags, packets = zip(*sample_user_data) + stream1 = SyncStreamFromLists(list(tags[:2]), list(packets[:2])) + stream2 = SyncStreamFromLists(list(tags[2:]), list(packets[2:])) + + # Apply same operations to different streams + pipeline1 = stream1 >> age_filter >> score_transform + pipeline2 = stream2 >> age_filter >> score_transform + + result1 = list(pipeline1) + result2 = list(pipeline2) + + # Both should work independently + for tag, packet in result1 + result2: + if len([tag, packet]) > 0: # If any results + assert packet["age"] >= 28 + assert packet["grade"] in ["A", "B"] diff --git a/tests/test_streams_operations/test_pipelines/test_recursive_features.py b/tests/test_streams_operations/test_pipelines/test_recursive_features.py new file mode 100644 index 0000000..2c6daa9 --- /dev/null +++ b/tests/test_streams_operations/test_pipelines/test_recursive_features.py @@ -0,0 +1,637 @@ +""" +Test module for recursive features and advanced pipeline patterns. + +This module tests advanced orcabridge features including recursive stream +operations, label chaining, length operations, source invocation patterns, +and complex pipeline compositions as demonstrated in the notebooks. +""" + +import pytest +import tempfile +from pathlib import Path +from unittest.mock import Mock, patch + +from orcabridge.base import SyncStream, Operation +from orcabridge.stream import SyncStreamFromLists, SyncStreamFromGenerator +from orcabridge.mapper import ( + Join, + Merge, + Filter, + Transform, + MapPackets, + MapTags, + Repeat, + DefaultTag, + Batch, + CacheStream, +) +from orcabridge.sources import GlobSource +from orcabridge.pod import FunctionPod + + +@pytest.fixture +def hierarchical_data(): + """Hierarchical data for testing recursive operations.""" + return [ + ( + {"level": 1, "parent": None, "id": "root"}, + {"name": "Root", "children": ["a", "b"]}, + ), + ( + {"level": 2, "parent": "root", "id": "a"}, + {"name": "Node A", "children": ["a1", "a2"]}, + ), + ( + {"level": 2, "parent": "root", "id": "b"}, + {"name": "Node B", "children": ["b1"]}, + ), + ({"level": 3, "parent": "a", "id": "a1"}, {"name": "Leaf A1", "children": []}), + ({"level": 3, "parent": "a", "id": "a2"}, {"name": "Leaf A2", "children": []}), + ({"level": 3, "parent": "b", "id": "b1"}, {"name": "Leaf B1", "children": []}), + ] + + +@pytest.fixture +def temp_nested_files(): + """Create nested file structure for testing.""" + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + + # Create nested directory structure + (temp_path / "level1").mkdir() + (temp_path / "level1" / "level2").mkdir() + + files = {} + + # Root level files + for i in range(3): + file_path = temp_path / f"root_{i}.txt" + with open(file_path, "w") as f: + f.write(f"Root file {i}") + files[f"root_{i}"] = file_path + + # Level 1 files + for i in range(2): + file_path = temp_path / "level1" / f"l1_{i}.txt" + with open(file_path, "w") as f: + f.write(f"Level 1 file {i}") + files[f"l1_{i}"] = file_path + + # Level 2 files + file_path = temp_path / "level1" / "level2" / "l2_0.txt" + with open(file_path, "w") as f: + f.write("Level 2 file") + files["l2_0"] = file_path + + yield temp_path, files + + +class TestRecursiveStreamOperations: + """Test recursive and self-referential stream operations.""" + + def test_recursive_stream_processing(self, hierarchical_data): + """Test recursive processing of hierarchical data.""" + tags, packets = zip(*hierarchical_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + def process_level(stream, max_level=3): + """Recursively process each level.""" + + def level_processor(tag, packet): + level = tag["level"] + if level < max_level: + # Add processing marker + return tag, {**packet, f"processed_level_{level}": True} + else: + # Leaf nodes get different processing + return tag, {**packet, "is_leaf": True} + + return Transform(level_processor)(stream) + + # Apply recursive processing + processed = process_level(source_stream) + result = list(processed) + + # Check that different levels are processed differently + for tag, packet in result: + level = tag["level"] + if level < 3: + assert f"processed_level_{level}" in packet + else: + assert packet["is_leaf"] is True + + def test_recursive_stream_expansion(self, hierarchical_data): + """Test recursive expansion of stream data.""" + # Start with root nodes only + root_data = [item for item in hierarchical_data if item[0]["parent"] is None] + tags, packets = zip(*root_data) + root_stream = SyncStreamFromLists(list(tags), list(packets)) + + def expand_children(tag, packet): + """Generate child nodes for each parent.""" + children = packet.get("children", []) + for child_id in children: + # Find child data from hierarchical_data + for h_tag, h_packet in hierarchical_data: + if h_tag["id"] == child_id: + yield h_tag, h_packet + break + + # Create expanding pod + expander = FunctionPod(expand_children) + expanded = expander(root_stream) + result = list(expanded) + + # Should have expanded to include all children + assert len(result) >= 2 # At least the immediate children + + def test_recursive_filtering_cascade(self, hierarchical_data): + """Test recursive filtering that cascades through levels.""" + tags, packets = zip(*hierarchical_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Create a cascade of filters for each level + level_1_filter = Filter(lambda tag, packet: tag["level"] == 1) + level_2_filter = Filter(lambda tag, packet: tag["level"] <= 2) + level_3_filter = Filter(lambda tag, packet: tag["level"] <= 3) + + # Apply filters recursively + def recursive_filter(stream, current_level=1): + if current_level == 1: + filtered = level_1_filter(stream) + elif current_level == 2: + filtered = level_2_filter(stream) + else: + filtered = level_3_filter(stream) + + return filtered + + # Test each level + level_1_result = list(recursive_filter(source_stream, 1)) + level_2_result = list(recursive_filter(source_stream, 2)) + level_3_result = list(recursive_filter(source_stream, 3)) + + assert len(level_1_result) == 1 # Only root + assert len(level_2_result) == 3 # Root + level 2 nodes + assert len(level_3_result) == 6 # All nodes + + def test_self_referential_stream_operations(self, hierarchical_data): + """Test operations that reference the stream itself.""" + tags, packets = zip(*hierarchical_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Cache the stream for self-reference + cache = CacheStream() + cached_stream = cache(source_stream) + + # Consume the cache + list(cached_stream) + + # Now create operations that reference the cached data + def find_parent_info(tag, packet): + parent_id = tag.get("parent") + if parent_id: + # Look up parent in cached stream + for cached_tag, cached_packet in cache.cache: + if cached_tag["id"] == parent_id: + return tag, { + **packet, + "parent_name": cached_packet["name"], + "parent_level": cached_tag["level"], + } + return tag, {**packet, "parent_name": None, "parent_level": None} + + # Apply parent lookup + enriched = Transform(find_parent_info)(cached_stream) + result = list(enriched) + + # Check parent information was added + for tag, packet in result: + if tag["parent"] is not None: + assert packet["parent_name"] is not None + assert packet["parent_level"] is not None + + +class TestLabelAndLengthOperations: + """Test label manipulation and length operations.""" + + def test_label_chaining_operations(self, hierarchical_data): + """Test chaining operations with label tracking.""" + tags, packets = zip(*hierarchical_data) + source_stream = SyncStreamFromLists( + list(tags), list(packets), label="hierarchical_source" + ) + + # Create labeled operations + filter_op = Filter(lambda tag, packet: tag["level"] <= 2) + transform_op = Transform( + lambda tag, packet: (tag, {**packet, "processed": True}) + ) + + # Apply operations and track labels + filtered = filter_op(source_stream) + assert filtered.label.startswith("Filter_") + + transformed = transform_op(filtered) + assert transformed.label.startswith("Transform_") + + # Check that invocation chain is maintained + result = list(transformed) + assert len(result) == 3 # Root + 2 level-2 nodes + + def test_stream_length_operations(self, hierarchical_data): + """Test operations that depend on stream length.""" + tags, packets = zip(*hierarchical_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + def length_dependent_transform(tag, packet): + # This would need to know stream length + # For simulation, we'll use a mock length + stream_length = 6 # Known length of hierarchical_data + return tag, { + **packet, + "relative_position": tag["level"] / 3, # Relative to max level + "is_majority_level": tag["level"] == 3, # Most nodes are level 3 + } + + processed = Transform(length_dependent_transform)(source_stream) + result = list(processed) + + # Check length-dependent calculations + for tag, packet in result: + assert "relative_position" in packet + assert "is_majority_level" in packet + if tag["level"] == 3: + assert packet["is_majority_level"] is True + + def test_dynamic_label_generation(self, hierarchical_data): + """Test dynamic label generation based on stream content.""" + tags, packets = zip(*hierarchical_data) + + # Create streams with content-based labels + def create_labeled_stream(data, label_func): + stream_tags, stream_packets = zip(*data) + label = label_func(data) + return SyncStreamFromLists( + list(stream_tags), list(stream_packets), label=label + ) + + # Different labeling strategies + level_1_data = [item for item in hierarchical_data if item[0]["level"] == 1] + level_2_data = [item for item in hierarchical_data if item[0]["level"] == 2] + level_3_data = [item for item in hierarchical_data if item[0]["level"] == 3] + + stream_1 = create_labeled_stream( + level_1_data, lambda data: f"level_1_stream_{len(data)}_items" + ) + stream_2 = create_labeled_stream( + level_2_data, lambda data: f"level_2_stream_{len(data)}_items" + ) + stream_3 = create_labeled_stream( + level_3_data, lambda data: f"level_3_stream_{len(data)}_items" + ) + + assert stream_1.label == "level_1_stream_1_items" + assert stream_2.label == "level_2_stream_2_items" + assert stream_3.label == "level_3_stream_3_items" + + +class TestSourceInvocationPatterns: + """Test advanced source invocation and composition patterns.""" + + def test_multiple_source_composition(self, temp_nested_files): + """Test composing multiple sources with different patterns.""" + temp_path, files = temp_nested_files + + # Create different sources for different levels + root_source = GlobSource(str(temp_path / "*.txt"), label="root_files") + level1_source = GlobSource( + str(temp_path / "level1" / "*.txt"), label="level1_files" + ) + level2_source = GlobSource( + str(temp_path / "level1" / "level2" / "*.txt"), label="level2_files" + ) + + # Compose sources + all_sources = Merge()(root_source, level1_source, level2_source) + result = list(all_sources) + + # Should have files from all levels + assert len(result) >= 6 # 3 root + 2 level1 + 1 level2 + + # Check that files from different levels are included + paths = [tag["path"] for tag, packet in result] + assert any("root_" in str(path) for path in paths) + assert any("l1_" in str(path) for path in paths) + assert any("l2_" in str(path) for path in paths) + + def test_conditional_source_invocation(self, temp_nested_files): + """Test conditional source invocation based on data content.""" + temp_path, files = temp_nested_files + + def conditional_source_factory(condition): + """Create source based on condition.""" + if condition == "root": + return GlobSource(str(temp_path / "*.txt")) + elif condition == "nested": + return GlobSource(str(temp_path / "**" / "*.txt")) + else: + return SyncStreamFromLists([], []) # Empty stream + + # Test different conditions + root_stream = conditional_source_factory("root") + nested_stream = conditional_source_factory("nested") + empty_stream = conditional_source_factory("other") + + root_result = list(root_stream) + nested_result = list(nested_stream) + empty_result = list(empty_stream) + + assert len(root_result) == 3 # Only root files + assert len(nested_result) >= 6 # All files recursively + assert len(empty_result) == 0 + + def test_recursive_source_generation(self, temp_nested_files): + """Test recursive generation of sources.""" + temp_path, files = temp_nested_files + + def recursive_file_processor(tag, packet): + """Process file and potentially generate more sources.""" + file_path = Path(tag["path"]) + + # If this is a directory-like file, yield info about subdirectories + if "level1" in str(file_path.parent): + # This file is in level1, so it knows about level2 + yield tag, {**packet, "has_subdirs": True, "subdir_count": 1} + else: + yield tag, {**packet, "has_subdirs": False, "subdir_count": 0} + + # Start with root source + root_source = GlobSource(str(temp_path / "*.txt")) + + # Apply recursive processing + processor = FunctionPod(recursive_file_processor) + processed = processor(root_source) + result = list(processed) + + # Check recursive information + for tag, packet in result: + assert "has_subdirs" in packet + assert "subdir_count" in packet + + def test_source_caching_and_reuse(self, temp_nested_files): + """Test caching and reusing source results.""" + temp_path, files = temp_nested_files + + # Create cached source + source = GlobSource(str(temp_path / "*.txt")) + cache = CacheStream() + cached_source = cache(source) + + # First consumption + result1 = list(cached_source) + + # Verify caching worked + assert cache.is_cached + assert len(cache.cache) == 3 + + # Create new operations using cached source + filter_op = Filter(lambda tag, packet: "root_1" in str(tag["path"])) + transform_op = Transform(lambda tag, packet: (tag, {**packet, "reused": True})) + + # Apply operations to cached source + filtered = filter_op(cache()) # Use cached version + transformed = transform_op(cache()) # Use cached version again + + filter_result = list(filtered) + transform_result = list(transformed) + + # Both should work independently using cached data + assert len(filter_result) == 1 # Only root_1 file + assert len(transform_result) == 3 # All files with reused flag + + for tag, packet in transform_result: + assert packet["reused"] is True + + +class TestComplexPipelinePatterns: + """Test complex pipeline patterns and compositions.""" + + def test_branching_and_merging_pipeline(self, hierarchical_data): + """Test pipeline that branches and merges back together.""" + tags, packets = zip(*hierarchical_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Create branches for different processing paths + branch_a = ( + source_stream + >> Filter(lambda tag, packet: tag["level"] <= 2) + >> Transform( + lambda tag, packet: (tag, {**packet, "branch": "A", "priority": "high"}) + ) + ) + + branch_b = ( + source_stream + >> Filter(lambda tag, packet: tag["level"] == 3) + >> Transform( + lambda tag, packet: (tag, {**packet, "branch": "B", "priority": "low"}) + ) + ) + + # Merge branches back together + merged = Merge()(branch_a, branch_b) + result = list(merged) + + # Should have all original items but with branch processing + assert len(result) == 6 + + # Check branch assignments + branches = [packet["branch"] for tag, packet in result] + assert "A" in branches + assert "B" in branches + + def test_multi_level_pipeline_composition(self, hierarchical_data): + """Test multi-level pipeline composition with nested operations.""" + tags, packets = zip(*hierarchical_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Level 1: Basic filtering and transformation + level1_pipeline = ( + source_stream + >> Filter(lambda tag, packet: len(packet["name"]) > 5) + >> Transform( + lambda tag, packet: (tag, {**packet, "level1_processed": True}) + ) + ) + + # Level 2: Advanced processing based on level 1 + level2_pipeline = ( + level1_pipeline + >> MapTags({"level": "hierarchy_level", "id": "node_id"}) + >> MapPackets({"name": "node_name", "children": "child_nodes"}) + ) + + # Level 3: Final aggregation and summary + level3_pipeline = level2_pipeline >> Transform( + lambda tag, packet: ( + tag, + { + **packet, + "final_processed": True, + "child_count": len(packet["child_nodes"]), + "has_children": len(packet["child_nodes"]) > 0, + }, + ) + ) + + result = list(level3_pipeline) + + # Check multi-level processing + for tag, packet in result: + assert packet["level1_processed"] is True + assert packet["final_processed"] is True + assert "hierarchy_level" in tag + assert "node_name" in packet + assert "child_count" in packet + + def test_pipeline_with_feedback_loop(self, hierarchical_data): + """Test pipeline pattern that simulates feedback loops.""" + tags, packets = zip(*hierarchical_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + # Create a cache to simulate feedback + feedback_cache = CacheStream() + + # First pass: process and cache + first_pass = ( + source_stream + >> Transform(lambda tag, packet: (tag, {**packet, "pass": 1})) + >> feedback_cache + ) + + # Consume first pass to populate cache + first_result = list(first_pass) + + # Second pass: use cached data for enrichment + def enrich_with_feedback(tag, packet): + # Use cached data to enrich current item + related_items = [] + for cached_tag, cached_packet in feedback_cache.cache: + if ( + cached_tag["level"] == tag["level"] + and cached_tag["id"] != tag["id"] + ): + related_items.append(cached_packet["name"]) + + return tag, { + **packet, + "pass": 2, + "related_items": related_items, + "relation_count": len(related_items), + } + + second_pass = Transform(enrich_with_feedback)(feedback_cache()) + second_result = list(second_pass) + + # Check feedback enrichment + for tag, packet in second_result: + assert packet["pass"] == 2 + assert "related_items" in packet + assert "relation_count" in packet + + def test_pipeline_error_handling_and_recovery(self, hierarchical_data): + """Test pipeline error handling and recovery patterns.""" + tags, packets = zip(*hierarchical_data) + source_stream = SyncStreamFromLists(list(tags), list(packets)) + + def potentially_failing_operation(tag, packet): + # Fail on specific condition + if tag["id"] == "a1": # Fail on specific node + raise ValueError("Processing failed for a1") + return tag, {**packet, "processed": True} + + # Create error-tolerant pipeline + def error_tolerant_transform(tag, packet): + try: + return potentially_failing_operation(tag, packet) + except ValueError: + # Recovery: mark as failed but continue + return tag, {**packet, "processed": False, "error": True} + + pipeline = Transform(error_tolerant_transform)(source_stream) + result = list(pipeline) + + # Should have processed all items despite error + assert len(result) == 6 + + # Check error handling + failed_items = [ + item for tag, packet in result for item in [packet] if packet.get("error") + ] + successful_items = [ + item + for tag, packet in result + for item in [packet] + if packet.get("processed") + ] + + assert len(failed_items) == 1 # One failed item + assert len(successful_items) == 5 # Five successful items + + def test_dynamic_pipeline_construction(self, hierarchical_data): + """Test dynamic construction of pipelines based on data characteristics.""" + tags, packets = zip(*hierarchical_data) + + def build_dynamic_pipeline(data): + """Build pipeline based on data characteristics.""" + # Analyze data + levels = set(tag["level"] for tag, packet in data) + max_level = max(levels) + has_children = any(len(packet["children"]) > 0 for tag, packet in data) + + # Build pipeline dynamically + base_stream = SyncStreamFromLists( + [tag for tag, packet in data], [packet for tag, packet in data] + ) + + operations = [base_stream] + + # Add level-specific processing + if max_level > 2: + operations.append( + Transform( + lambda tag, packet: (tag, {**packet, "is_deep_hierarchy": True}) + ) + ) + + # Add child processing if needed + if has_children: + operations.append( + Transform( + lambda tag, packet: ( + tag, + { + **packet, + "child_info": f"has_{len(packet['children'])}_children", + }, + ) + ) + ) + + # Chain operations + pipeline = operations[0] + for op in operations[1:]: + if isinstance(op, Transform): + pipeline = op(pipeline) + + return pipeline + + # Build and execute dynamic pipeline + dynamic_pipeline = build_dynamic_pipeline(hierarchical_data) + result = list(dynamic_pipeline) + + # Check dynamic processing + for tag, packet in result: + assert "is_deep_hierarchy" in packet # Should be added due to max_level > 2 + assert "child_info" in packet # Should be added due to has_children diff --git a/tests/test_streams_operations/test_pods/__init__.py b/tests/test_streams_operations/test_pods/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_streams_operations/test_pods/test_function_pod.py b/tests/test_streams_operations/test_pods/test_function_pod.py new file mode 100644 index 0000000..1b1f0a8 --- /dev/null +++ b/tests/test_streams_operations/test_pods/test_function_pod.py @@ -0,0 +1,305 @@ +"""Tests for FunctionPod functionality.""" + +import pytest +from orcabridge.pod import FunctionPod +from orcabridge.stream import SyncStreamFromLists + + +class TestFunctionPod: + """Test cases for FunctionPod.""" + + def test_function_pod_no_output(self, sample_stream, func_no_output): + """Test function pod with function that has no output.""" + pod = FunctionPod(func_no_output) + result_stream = pod(sample_stream) + + result = list(result_stream) + + # Should produce no output + assert len(result) == 0 + + def test_function_pod_single_output(self, sample_stream, func_single_output): + """Test function pod with function that has single output.""" + pod = FunctionPod(func_single_output) + result_stream = pod(sample_stream) + + result = list(result_stream) + + # Should produce one output per input + original_packets = list(sample_stream) + assert len(result) == len(original_packets) + + for i, (packet, tag) in enumerate(result): + expected_packet = f"processed_{original_packets[i][0]}" + assert packet == expected_packet + + def test_function_pod_multiple_outputs(self, sample_stream, func_multiple_outputs): + """Test function pod with function that has multiple outputs.""" + pod = FunctionPod(func_multiple_outputs) + result_stream = pod(sample_stream) + + result = list(result_stream) + + # Should produce two outputs per input + original_packets = list(sample_stream) + assert len(result) == len(original_packets) * 2 + + # Check that we get pairs of outputs + for i in range(0, len(result), 2): + original_idx = i // 2 + original_packet = original_packets[original_idx][0] + + # First output should be the packet itself + assert result[i][0] == original_packet + # Second output should be uppercased + assert result[i + 1][0] == str(original_packet).upper() + + def test_function_pod_error_function(self, sample_stream, func_with_error): + """Test function pod with function that raises error.""" + pod = FunctionPod(func_with_error) + result_stream = pod(sample_stream) + + # Should raise error when processing + with pytest.raises(ValueError, match="Function error"): + list(result_stream) + + def test_function_pod_with_datastore(self, func_single_output, data_store): + """Test function pod with datastore integration.""" + + # Create a function that uses the datastore + def datastore_function(inputs, datastore): + packet, tag = inputs[0] + # Store and retrieve from datastore + datastore["processed_count"] = datastore.get("processed_count", 0) + 1 + return f"item_{datastore['processed_count']}_{packet}" + + pod = FunctionPod(datastore_function, datastore=data_store) + + packets = ["a", "b", "c"] + tags = ["tag1", "tag2", "tag3"] + stream = SyncStreamFromLists(packets, tags) + + result_stream = pod(stream) + result = list(result_stream) + + # Should use datastore to track processing + expected = [("item_1_a", "tag1"), ("item_2_b", "tag2"), ("item_3_c", "tag3")] + assert result == expected + assert data_store["processed_count"] == 3 + + def test_function_pod_different_input_counts(self): + """Test function pod with functions expecting different input counts.""" + + # Function expecting 1 input + def single_input_func(inputs): + packet, tag = inputs[0] + return f"single_{packet}" + + # Function expecting 2 inputs + def double_input_func(inputs): + if len(inputs) < 2: + return None # Not enough inputs + packet1, tag1 = inputs[0] + packet2, tag2 = inputs[1] + return f"combined_{packet1}_{packet2}" + + packets = ["a", "b", "c", "d"] + tags = ["t1", "t2", "t3", "t4"] + stream = SyncStreamFromLists(packets, tags) + + # Test single input function + pod1 = FunctionPod(single_input_func) + result1 = list(pod1(stream)) + + assert len(result1) == 4 + assert result1[0][0] == "single_a" + assert result1[1][0] == "single_b" + + # Test double input function (if supported) + # This behavior depends on FunctionPod implementation + try: + pod2 = FunctionPod(double_input_func, input_count=2) + stream2 = SyncStreamFromLists(packets, tags) + result2 = list(pod2(stream2)) + + # Should produce fewer outputs since it needs 2 inputs per call + assert len(result2) <= len(packets) + + except (TypeError, AttributeError): + # FunctionPod might not support configurable input counts + pass + + def test_function_pod_with_none_outputs(self, sample_stream): + """Test function pod with function that sometimes returns None.""" + + def conditional_function(inputs): + packet, tag = inputs[0] + # Only process strings + if isinstance(packet, str): + return f"processed_{packet}" + return None # Skip non-strings + + # Mix of string and non-string packets + packets = ["hello", 42, "world", None, "test"] + tags = ["str1", "int1", "str2", "null1", "str3"] + stream = SyncStreamFromLists(packets, tags) + + pod = FunctionPod(conditional_function) + result_stream = pod(stream) + result = list(result_stream) + + # Should only process string packets + string_packets = [p for p in packets if isinstance(p, str)] + assert len(result) == len(string_packets) + + for packet, _ in result: + assert packet.startswith("processed_") + + def test_function_pod_stateful_function(self, data_store): + """Test function pod with stateful function using datastore.""" + + def stateful_function(inputs, datastore): + packet, tag = inputs[0] + + # Keep running total + if "total" not in datastore: + datastore["total"] = 0 + if "count" not in datastore: + datastore["count"] = 0 + + if isinstance(packet, (int, float)): + datastore["total"] += packet + datastore["count"] += 1 + avg = datastore["total"] / datastore["count"] + return f"avg_so_far_{avg:.2f}" + + return None + + packets = [10, 20, 30, 40] + tags = ["n1", "n2", "n3", "n4"] + stream = SyncStreamFromLists(packets, tags) + + pod = FunctionPod(stateful_function, datastore=data_store) + result_stream = pod(stream) + result = list(result_stream) + + # Should produce running averages + assert len(result) == 4 + assert result[0][0] == "avg_so_far_10.00" # 10/1 + assert result[1][0] == "avg_so_far_15.00" # (10+20)/2 + assert result[2][0] == "avg_so_far_20.00" # (10+20+30)/3 + assert result[3][0] == "avg_so_far_25.00" # (10+20+30+40)/4 + + def test_function_pod_generator_output(self, sample_stream): + """Test function pod with function that yields multiple outputs.""" + + def generator_function(inputs): + packet, tag = inputs[0] + # Yield multiple outputs for each input + for i in range(3): + yield f"{packet}_part_{i}" + + pod = FunctionPod(generator_function) + result_stream = pod(sample_stream) + result = list(result_stream) + + # Should produce 3 outputs per input + original_packets = list(sample_stream) + assert len(result) == len(original_packets) * 3 + + # Check pattern of outputs + for i, (packet, tag) in enumerate(result): + original_idx = i // 3 + part_idx = i % 3 + original_packet = original_packets[original_idx][0] + expected_packet = f"{original_packet}_part_{part_idx}" + assert packet == expected_packet + + def test_function_pod_complex_data_transformation(self): + """Test function pod with complex data transformation.""" + + def json_processor(inputs): + packet, tag = inputs[0] + + if isinstance(packet, dict): + # Extract all values and create separate outputs + for key, value in packet.items(): + yield f"{key}={value}" + else: + yield f"non_dict_{packet}" + + packets = [ + {"name": "Alice", "age": 30}, + "simple_string", + {"x": 1, "y": 2, "z": 3}, + ] + tags = ["person", "text", "coordinates"] + stream = SyncStreamFromLists(packets, tags) + + pod = FunctionPod(json_processor) + result_stream = pod(stream) + result = list(result_stream) + + # Should extract dict entries + result_packets = [packet for packet, _ in result] + + assert "name=Alice" in result_packets + assert "age=30" in result_packets + assert "non_dict_simple_string" in result_packets + assert "x=1" in result_packets + assert "y=2" in result_packets + assert "z=3" in result_packets + + def test_function_pod_empty_stream(self, func_single_output): + """Test function pod with empty stream.""" + empty_stream = SyncStreamFromLists([], []) + pod = FunctionPod(func_single_output) + result_stream = pod(empty_stream) + + result = list(result_stream) + assert len(result) == 0 + + def test_function_pod_large_stream(self, func_single_output): + """Test function pod with large stream.""" + packets = [f"packet_{i}" for i in range(1000)] + tags = [f"tag_{i}" for i in range(1000)] + stream = SyncStreamFromLists(packets, tags) + + pod = FunctionPod(func_single_output) + result_stream = pod(stream) + + # Process stream lazily to test memory efficiency + count = 0 + for packet, tag in result_stream: + count += 1 + if count == 100: # Stop early + break + + assert count == 100 + + def test_function_pod_chaining(self, func_single_output): + """Test chaining function pods.""" + + def second_processor(inputs): + packet, tag = inputs[0] + return f"second_{packet}" + + packets = ["a", "b", "c"] + tags = ["t1", "t2", "t3"] + stream = SyncStreamFromLists(packets, tags) + + # Chain two function pods + pod1 = FunctionPod(func_single_output) + pod2 = FunctionPod(second_processor) + + intermediate_stream = pod1(stream) + final_stream = pod2(intermediate_stream) + result = list(final_stream) + + # Should apply both transformations + expected = [ + ("second_processed_a", "t1"), + ("second_processed_b", "t2"), + ("second_processed_c", "t3"), + ] + assert result == expected diff --git a/tests/test_streams_operations/test_pods/test_function_pod_datastore.py b/tests/test_streams_operations/test_pods/test_function_pod_datastore.py new file mode 100644 index 0000000..e3a2fa4 --- /dev/null +++ b/tests/test_streams_operations/test_pods/test_function_pod_datastore.py @@ -0,0 +1,403 @@ +""" +Test module for FunctionPod datastore integration. + +This module tests FunctionPod functionality when working with datastore operations, +including storage, retrieval, and state management across pod invocations. +""" + +import pytest +import tempfile +import os +from pathlib import Path + +from orcabridge.pod import FunctionPod +from orcabridge.stream import SyncStreamFromLists + + +@pytest.fixture +def temp_datastore(): + """Create a temporary datastore directory.""" + with tempfile.TemporaryDirectory() as temp_dir: + yield Path(temp_dir) + + +@pytest.fixture +def sample_stream_data(): + """Sample stream data for testing.""" + return [ + ({"file_id": 1}, {"content": "Hello World", "metadata": {"type": "text"}}), + ({"file_id": 2}, {"content": "Python Code", "metadata": {"type": "code"}}), + ( + {"file_id": 3}, + {"content": "Data Analysis", "metadata": {"type": "analysis"}}, + ), + ] + + +@pytest.fixture +def sample_stream(sample_stream_data): + """Create a sample stream.""" + tags, packets = zip(*sample_stream_data) + return SyncStreamFromLists(list(tags), list(packets)) + + +class TestFunctionPodDatastore: + """Test cases for FunctionPod datastore integration.""" + + def test_datastore_saving_function(self, temp_datastore, sample_stream): + """Test FunctionPod with function that saves data to datastore.""" + + def save_to_datastore(tag, packet, datastore): + """Save packet content to datastore.""" + file_id = tag["file_id"] + content = packet["content"] + + # Create file path + file_path = datastore / f"file_{file_id}.txt" + + # Save content + with open(file_path, "w") as f: + f.write(content) + + # Return tag and packet with file path + return tag, {**packet, "saved_path": str(file_path)} + + # Create pod with datastore + pod = FunctionPod(save_to_datastore, datastore=temp_datastore) + + # Process stream + result_stream = pod(sample_stream) + result = list(result_stream) + + # Check results + assert len(result) == 3 + + # Verify files were created + for i, (tag, packet) in enumerate(result, 1): + expected_path = temp_datastore / f"file_{i}.txt" + assert expected_path.exists() + + # Verify content + with open(expected_path, "r") as f: + saved_content = f.read() + + original_content = sample_stream_data[i - 1][1]["content"] + assert saved_content == original_content + + # Verify packet contains path + assert "saved_path" in packet + assert packet["saved_path"] == str(expected_path) + + def test_datastore_loading_function(self, temp_datastore): + """Test FunctionPod with function that loads data from datastore.""" + + # First, create some test files + test_files = { + "file1.txt": "Content of file 1", + "file2.txt": "Content of file 2", + "file3.txt": "Content of file 3", + } + + for filename, content in test_files.items(): + file_path = temp_datastore / filename + with open(file_path, "w") as f: + f.write(content) + + def load_from_datastore(tag, packet, datastore): + """Load content from datastore based on filename in packet.""" + filename = packet["filename"] + file_path = datastore / filename + + if file_path.exists(): + with open(file_path, "r") as f: + content = f.read() + return tag, {**packet, "content": content, "loaded": True} + else: + return tag, {**packet, "content": None, "loaded": False} + + # Create input stream with filenames + tags = [{"request_id": i} for i in range(1, 4)] + packets = [{"filename": f"file{i}.txt"} for i in range(1, 4)] + input_stream = SyncStreamFromLists(tags, packets) + + # Create pod with datastore + pod = FunctionPod(load_from_datastore, datastore=temp_datastore) + + # Process stream + result_stream = pod(input_stream) + result = list(result_stream) + + # Check results + assert len(result) == 3 + + for i, (tag, packet) in enumerate(result): + assert packet["loaded"] is True + assert packet["content"] == f"Content of file {i + 1}" + assert packet["filename"] == f"file{i + 1}.txt" + + def test_datastore_with_stateful_operations(self, temp_datastore): + """Test FunctionPod with stateful operations using datastore.""" + + def stateful_counter(tag, packet, datastore): + """Maintain a counter in datastore across invocations.""" + counter_file = datastore / "counter.txt" + + # Read current counter value + if counter_file.exists(): + with open(counter_file, "r") as f: + count = int(f.read().strip()) + else: + count = 0 + + # Increment counter + count += 1 + + # Save new counter value + with open(counter_file, "w") as f: + f.write(str(count)) + + return tag, {**packet, "sequence_number": count} + + # Create multiple input streams to test state persistence + tags1 = [{"batch": 1, "item": i} for i in range(3)] + packets1 = [{"data": f"item_{i}"} for i in range(3)] + stream1 = SyncStreamFromLists(tags1, packets1) + + tags2 = [{"batch": 2, "item": i} for i in range(2)] + packets2 = [{"data": f"item_{i}"} for i in range(2)] + stream2 = SyncStreamFromLists(tags2, packets2) + + # Create pod with datastore + pod = FunctionPod(stateful_counter, datastore=temp_datastore) + + # Process first stream + result1 = list(pod(stream1)) + + # Process second stream (should continue counting) + result2 = list(pod(stream2)) + + # Check that counter state persisted across streams + expected_sequences1 = [1, 2, 3] + expected_sequences2 = [4, 5] + + for i, (tag, packet) in enumerate(result1): + assert packet["sequence_number"] == expected_sequences1[i] + + for i, (tag, packet) in enumerate(result2): + assert packet["sequence_number"] == expected_sequences2[i] + + def test_datastore_error_handling(self, temp_datastore): + """Test error handling when datastore operations fail.""" + + def failing_datastore_operation(tag, packet, datastore): + """Function that tries to access non-existent file.""" + nonexistent_file = datastore / "nonexistent.txt" + + # This should raise an exception + with open(nonexistent_file, "r") as f: + content = f.read() + + return tag, {**packet, "content": content} + + tags = [{"id": 1}] + packets = [{"data": "test"}] + stream = SyncStreamFromLists(tags, packets) + + pod = FunctionPod(failing_datastore_operation, datastore=temp_datastore) + result_stream = pod(stream) + + # Should propagate the file not found error + with pytest.raises(FileNotFoundError): + list(result_stream) + + def test_datastore_with_subdirectories(self, temp_datastore): + """Test FunctionPod with datastore operations using subdirectories.""" + + def organize_by_type(tag, packet, datastore): + """Organize files by type in subdirectories.""" + file_type = packet["type"] + content = packet["content"] + file_id = tag["id"] + + # Create subdirectory + type_dir = datastore / file_type + type_dir.mkdir(exist_ok=True) + + # Save file in subdirectory + file_path = type_dir / f"{file_id}.txt" + with open(file_path, "w") as f: + f.write(content) + + return tag, {**packet, "organized_path": str(file_path)} + + # Create input with different types + tags = [{"id": f"file_{i}"} for i in range(4)] + packets = [ + {"type": "documents", "content": "Document content 1"}, + {"type": "images", "content": "Image metadata 1"}, + {"type": "documents", "content": "Document content 2"}, + {"type": "code", "content": "Python code"}, + ] + stream = SyncStreamFromLists(tags, packets) + + pod = FunctionPod(organize_by_type, datastore=temp_datastore) + result = list(pod(stream)) + + # Check that subdirectories were created + assert (temp_datastore / "documents").exists() + assert (temp_datastore / "images").exists() + assert (temp_datastore / "code").exists() + + # Check that files were saved in correct subdirectories + assert (temp_datastore / "documents" / "file_0.txt").exists() + assert (temp_datastore / "images" / "file_1.txt").exists() + assert (temp_datastore / "documents" / "file_2.txt").exists() + assert (temp_datastore / "code" / "file_3.txt").exists() + + def test_datastore_without_datastore_param(self): + """Test that function without datastore parameter works normally.""" + + def simple_function(tag, packet): + """Function that doesn't use datastore.""" + return tag, {**packet, "processed": True} + + # This should work even though we don't provide datastore + pod = FunctionPod(simple_function) + + tags = [{"id": 1}] + packets = [{"data": "test"}] + stream = SyncStreamFromLists(tags, packets) + + result = list(pod(stream)) + assert len(result) == 1 + assert result[0][1]["processed"] is True + + def test_datastore_metadata_operations(self, temp_datastore): + """Test FunctionPod with metadata tracking in datastore.""" + + def track_processing_metadata(tag, packet, datastore): + """Track processing metadata for each item.""" + import time + import json + + item_id = tag["id"] + processing_time = time.time() + + # Create metadata entry + metadata = { + "item_id": item_id, + "processed_at": processing_time, + "original_data": packet["data"], + "processing_status": "completed", + } + + # Save metadata + metadata_file = datastore / f"metadata_{item_id}.json" + with open(metadata_file, "w") as f: + json.dump(metadata, f) + + return tag, {**packet, "metadata_file": str(metadata_file)} + + tags = [{"id": f"item_{i}"} for i in range(3)] + packets = [{"data": f"data_{i}"} for i in range(3)] + stream = SyncStreamFromLists(tags, packets) + + pod = FunctionPod(track_processing_metadata, datastore=temp_datastore) + result = list(pod(stream)) + + # Check that metadata files were created + for i in range(3): + metadata_file = temp_datastore / f"metadata_item_{i}.json" + assert metadata_file.exists() + + # Verify metadata content + import json + + with open(metadata_file, "r") as f: + metadata = json.load(f) + + assert metadata["item_id"] == f"item_{i}" + assert metadata["original_data"] == f"data_{i}" + assert metadata["processing_status"] == "completed" + assert "processed_at" in metadata + + def test_datastore_with_generator_function(self, temp_datastore): + """Test FunctionPod with generator function that uses datastore.""" + + def split_and_save(tag, packet, datastore): + """Split content and save each part separately.""" + content = packet["content"] + parts = content.split() + base_id = tag["id"] + + for i, part in enumerate(parts): + part_id = f"{base_id}_part_{i}" + + # Save part to datastore + part_file = datastore / f"{part_id}.txt" + with open(part_file, "w") as f: + f.write(part) + + # Yield new tag-packet pair + new_tag = {**tag, "part_id": part_id, "part_index": i} + new_packet = {"part_content": part, "saved_to": str(part_file)} + yield new_tag, new_packet + + tags = [{"id": "doc1"}] + packets = [{"content": "Hello World Python Programming"}] + stream = SyncStreamFromLists(tags, packets) + + pod = FunctionPod(split_and_save, datastore=temp_datastore) + result = list(pod(stream)) + + # Should have 4 parts + assert len(result) == 4 + + expected_parts = ["Hello", "World", "Python", "Programming"] + for i, (tag, packet) in enumerate(result): + assert tag["part_index"] == i + assert packet["part_content"] == expected_parts[i] + + # Check that file was saved + saved_file = Path(packet["saved_to"]) + assert saved_file.exists() + + with open(saved_file, "r") as f: + saved_content = f.read() + assert saved_content == expected_parts[i] + + def test_datastore_path_validation(self, temp_datastore): + """Test that datastore path is properly validated and accessible.""" + + def check_datastore_access(tag, packet, datastore): + """Function that checks datastore accessibility.""" + # Check if datastore is a Path object + assert isinstance(datastore, Path) + + # Check if datastore directory exists and is writable + assert datastore.exists() + assert datastore.is_dir() + + # Test writing and reading + test_file = datastore / "access_test.txt" + with open(test_file, "w") as f: + f.write("test") + + with open(test_file, "r") as f: + content = f.read() + + assert content == "test" + + # Clean up + test_file.unlink() + + return tag, {**packet, "datastore_accessible": True} + + tags = [{"id": 1}] + packets = [{"data": "test"}] + stream = SyncStreamFromLists(tags, packets) + + pod = FunctionPod(check_datastore_access, datastore=temp_datastore) + result = list(pod(stream)) + + assert result[0][1]["datastore_accessible"] is True diff --git a/tests/test_streams_operations/test_pods/test_pod_base.py b/tests/test_streams_operations/test_pods/test_pod_base.py new file mode 100644 index 0000000..8c79a9d --- /dev/null +++ b/tests/test_streams_operations/test_pods/test_pod_base.py @@ -0,0 +1,274 @@ +"""Tests for base Pod functionality.""" + +import pytest +from orcabridge.pod import Pod +from orcabridge.stream import SyncStreamFromLists + + +class TestPodBase: + """Test cases for base Pod class.""" + + def test_pod_creation(self): + """Test basic pod creation.""" + pod = Pod() + assert pod is not None + + def test_pod_call_interface(self, sample_stream): + """Test that pod implements callable interface.""" + pod = Pod() + + # Base Pod should be callable, but might not do anything useful + # This tests the interface exists + try: + result_stream = pod(sample_stream) + # If it succeeds, result should be a stream + assert hasattr(result_stream, "__iter__") + except NotImplementedError: + # Base Pod might not implement __call__ + pass + + def test_pod_with_empty_stream(self): + """Test pod with empty stream.""" + empty_stream = SyncStreamFromLists([], []) + pod = Pod() + + try: + result_stream = pod(empty_stream) + result = list(result_stream) + # If implemented, should handle empty stream + assert isinstance(result, list) + except NotImplementedError: + # Base Pod might not implement functionality + pass + + def test_pod_inheritance(self): + """Test that Pod can be inherited.""" + + class CustomPod(Pod): + def __call__(self, stream): + # Simple pass-through implementation + for packet, tag in stream: + yield packet, tag + + custom_pod = CustomPod() + packets = ["data1", "data2", "data3"] + tags = ["tag1", "tag2", "tag3"] + + stream = SyncStreamFromLists(packets, tags) + result_stream = custom_pod(stream) + result = list(result_stream) + + expected = list(zip(packets, tags)) + assert result == expected + + def test_pod_chaining(self): + """Test chaining pods together.""" + + class AddPrefixPod(Pod): + def __init__(self, prefix): + self.prefix = prefix + + def __call__(self, stream): + for packet, tag in stream: + yield f"{self.prefix}_{packet}", tag + + class AddSuffixPod(Pod): + def __init__(self, suffix): + self.suffix = suffix + + def __call__(self, stream): + for packet, tag in stream: + yield f"{packet}_{self.suffix}", tag + + packets = ["data1", "data2"] + tags = ["tag1", "tag2"] + stream = SyncStreamFromLists(packets, tags) + + # Chain two pods + prefix_pod = AddPrefixPod("PRE") + suffix_pod = AddSuffixPod("SUF") + + intermediate_stream = prefix_pod(stream) + final_stream = suffix_pod(intermediate_stream) + + result = list(final_stream) + + expected = [("PRE_data1_SUF", "tag1"), ("PRE_data2_SUF", "tag2")] + assert result == expected + + def test_pod_error_handling(self): + """Test pod error handling.""" + + class ErrorPod(Pod): + def __call__(self, stream): + for i, (packet, tag) in enumerate(stream): + if i == 1: # Error on second item + raise ValueError("Test error") + yield packet, tag + + packets = ["data1", "data2", "data3"] + tags = ["tag1", "tag2", "tag3"] + stream = SyncStreamFromLists(packets, tags) + + error_pod = ErrorPod() + result_stream = error_pod(stream) + + # Should raise error when processing second item + with pytest.raises(ValueError, match="Test error"): + list(result_stream) + + def test_pod_stateful_processing(self): + """Test pod with stateful processing.""" + + class CounterPod(Pod): + def __init__(self): + self.count = 0 + + def __call__(self, stream): + for packet, tag in stream: + self.count += 1 + yield (packet, self.count), tag + + packets = ["a", "b", "c"] + tags = ["t1", "t2", "t3"] + stream = SyncStreamFromLists(packets, tags) + + counter_pod = CounterPod() + result_stream = counter_pod(stream) + result = list(result_stream) + + expected = [(("a", 1), "t1"), (("b", 2), "t2"), (("c", 3), "t3")] + assert result == expected + + def test_pod_multiple_outputs_per_input(self): + """Test pod that produces multiple outputs per input.""" + + class DuplicatorPod(Pod): + def __call__(self, stream): + for packet, tag in stream: + yield f"{packet}_copy1", f"{tag}_1" + yield f"{packet}_copy2", f"{tag}_2" + + packets = ["data1", "data2"] + tags = ["tag1", "tag2"] + stream = SyncStreamFromLists(packets, tags) + + duplicator_pod = DuplicatorPod() + result_stream = duplicator_pod(stream) + result = list(result_stream) + + expected = [ + ("data1_copy1", "tag1_1"), + ("data1_copy2", "tag1_2"), + ("data2_copy1", "tag2_1"), + ("data2_copy2", "tag2_2"), + ] + assert result == expected + + def test_pod_filtering(self): + """Test pod that filters items.""" + + class FilterPod(Pod): + def __init__(self, predicate): + self.predicate = predicate + + def __call__(self, stream): + for packet, tag in stream: + if self.predicate(packet, tag): + yield packet, tag + + packets = [1, 2, 3, 4, 5] + tags = ["odd", "even", "odd", "even", "odd"] + stream = SyncStreamFromLists(packets, tags) + + # Filter for even numbers + def is_even(packet, tag): + return packet % 2 == 0 + + filter_pod = FilterPod(is_even) + result_stream = filter_pod(stream) + result = list(result_stream) + + expected = [(2, "even"), (4, "even")] + assert result == expected + + def test_pod_transformation(self): + """Test pod that transforms data.""" + + class TransformPod(Pod): + def __init__(self, transform_func): + self.transform_func = transform_func + + def __call__(self, stream): + for packet, tag in stream: + new_packet, new_tag = self.transform_func(packet, tag) + yield new_packet, new_tag + + packets = ["hello", "world"] + tags = ["greeting", "noun"] + stream = SyncStreamFromLists(packets, tags) + + def uppercase_transform(packet, tag): + return packet.upper(), tag.upper() + + transform_pod = TransformPod(uppercase_transform) + result_stream = transform_pod(stream) + result = list(result_stream) + + expected = [("HELLO", "GREETING"), ("WORLD", "NOUN")] + assert result == expected + + def test_pod_aggregation(self): + """Test pod that aggregates data.""" + + class SumPod(Pod): + def __call__(self, stream): + total = 0 + count = 0 + for packet, tag in stream: + if isinstance(packet, (int, float)): + total += packet + count += 1 + + if count > 0: + yield total, f"sum_of_{count}_items" + + packets = [1, 2, 3, 4, 5] + tags = ["n1", "n2", "n3", "n4", "n5"] + stream = SyncStreamFromLists(packets, tags) + + sum_pod = SumPod() + result_stream = sum_pod(stream) + result = list(result_stream) + + expected = [(15, "sum_of_5_items")] + assert result == expected + + def test_pod_with_complex_data(self): + """Test pod with complex data structures.""" + + class ExtractorPod(Pod): + def __call__(self, stream): + for packet, tag in stream: + if isinstance(packet, dict): + for key, value in packet.items(): + yield value, f"{tag}_{key}" + else: + yield packet, tag + + packets = [{"a": 1, "b": 2}, "simple_string", {"x": 10, "y": 20, "z": 30}] + tags = ["dict1", "str1", "dict2"] + stream = SyncStreamFromLists(packets, tags) + + extractor_pod = ExtractorPod() + result_stream = extractor_pod(stream) + result = list(result_stream) + + # Should extract dict values as separate items + assert len(result) == 6 # 2 + 1 + 3 + assert (1, "dict1_a") in result + assert (2, "dict1_b") in result + assert ("simple_string", "str1") in result + assert (10, "dict2_x") in result + assert (20, "dict2_y") in result + assert (30, "dict2_z") in result diff --git a/tests/test_streams_operations/test_sources/__init__.py b/tests/test_streams_operations/test_sources/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_streams_operations/test_sources/test_glob_source.py b/tests/test_streams_operations/test_sources/test_glob_source.py new file mode 100644 index 0000000..62875a0 --- /dev/null +++ b/tests/test_streams_operations/test_sources/test_glob_source.py @@ -0,0 +1,325 @@ +"""Tests for GlobSource functionality.""" + +import pytest +import os +from pathlib import Path +from orcabridge.sources import GlobSource + + +class TestGlobSource: + """Test cases for GlobSource.""" + + def test_glob_source_basic(self, test_files, temp_dir): + """Test basic glob source functionality.""" + # Create a glob pattern for txt files + pattern = os.path.join(temp_dir, "*.txt") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + + # Should find all txt files + txt_files = [f for f in test_files if f.endswith(".txt")] + assert len(result) == len(txt_files) + + # Check that all found files are actual files + for file_content, file_path in result: + assert os.path.isfile(file_path) + assert file_path.endswith(".txt") + assert isinstance(file_content, str) # Text content + + def test_glob_source_specific_pattern(self, test_files, temp_dir): + """Test glob source with specific pattern.""" + # Look for files starting with "file1" + pattern = os.path.join(temp_dir, "file1*") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + + # Should find only file1.txt + assert len(result) == 1 + file_content, file_path = result[0] + assert "file1.txt" in file_path + assert file_content == "Content of file 1" + + def test_glob_source_binary_files(self, test_files, temp_dir): + """Test glob source with binary files.""" + # Look for binary files + pattern = os.path.join(temp_dir, "*.bin") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + + # Should find all binary files + bin_files = [f for f in test_files if f.endswith(".bin")] + assert len(result) == len(bin_files) + + for file_content, file_path in result: + assert file_path.endswith(".bin") + assert isinstance(file_content, bytes) # Binary content + + def test_glob_source_json_files(self, test_files, temp_dir): + """Test glob source with JSON files.""" + pattern = os.path.join(temp_dir, "*.json") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + + # Should find all JSON files + json_files = [f for f in test_files if f.endswith(".json")] + assert len(result) == len(json_files) + + for file_content, file_path in result: + assert file_path.endswith(".json") + # Content should be the raw JSON string + assert '"key"' in file_content + + def test_glob_source_no_matches(self, temp_dir): + """Test glob source when pattern matches no files.""" + pattern = os.path.join(temp_dir, "*.nonexistent") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + assert len(result) == 0 + + def test_glob_source_recursive_pattern(self, temp_dir): + """Test glob source with recursive pattern.""" + # Create subdirectory with files + subdir = os.path.join(temp_dir, "subdir") + os.makedirs(subdir, exist_ok=True) + + sub_file = os.path.join(subdir, "sub_file.txt") + with open(sub_file, "w") as f: + f.write("Subdirectory content") + + # Use recursive pattern + pattern = os.path.join(temp_dir, "**", "*.txt") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + + # Should find files in both root and subdirectory + txt_files = [file_path for _, file_path in result] + + # Check that we found files in subdirectory + sub_files = [f for f in txt_files if "subdir" in f] + assert len(sub_files) > 0 + + # Verify content of subdirectory file + sub_result = [ + (content, path) for content, path in result if "sub_file.txt" in path + ] + assert len(sub_result) == 1 + assert sub_result[0][0] == "Subdirectory content" + + def test_glob_source_absolute_vs_relative_paths(self, test_files, temp_dir): + """Test glob source with both absolute and relative paths.""" + # Test with absolute path + abs_pattern = os.path.join(os.path.abspath(temp_dir), "*.txt") + abs_source = GlobSource(abs_pattern) + abs_stream = abs_source() + abs_result = list(abs_stream) + + # Test with relative path (if possible) + current_dir = os.getcwd() + try: + os.chdir(temp_dir) + rel_pattern = "*.txt" + rel_source = GlobSource(rel_pattern) + rel_stream = rel_source() + rel_result = list(rel_stream) + + # Should find the same number of files + assert len(abs_result) == len(rel_result) + + finally: + os.chdir(current_dir) + + def test_glob_source_empty_directory(self, temp_dir): + """Test glob source in empty directory.""" + empty_dir = os.path.join(temp_dir, "empty_subdir") + os.makedirs(empty_dir, exist_ok=True) + + pattern = os.path.join(empty_dir, "*") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + assert len(result) == 0 + + def test_glob_source_large_directory(self, temp_dir): + """Test glob source with many files.""" + # Create many files + for i in range(50): + file_path = os.path.join(temp_dir, f"bulk_file_{i:03d}.txt") + with open(file_path, "w") as f: + f.write(f"Content of bulk file {i}") + + pattern = os.path.join(temp_dir, "bulk_file_*.txt") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + + assert len(result) == 50 + + # Check that files are properly ordered (if implementation sorts) + file_paths = [file_path for _, file_path in result] + for i, file_path in enumerate(file_paths): + if "bulk_file_000.txt" in file_path: + # Found the first file, check content + content = [content for content, path in result if path == file_path][0] + assert "Content of bulk file 0" in content + + def test_glob_source_special_characters_in_filenames(self, temp_dir): + """Test glob source with special characters in filenames.""" + # Create files with special characters + special_files = [ + "file with spaces.txt", + "file-with-dashes.txt", + "file_with_underscores.txt", + "file.with.dots.txt", + ] + + for filename in special_files: + file_path = os.path.join(temp_dir, filename) + with open(file_path, "w") as f: + f.write(f"Content of {filename}") + + pattern = os.path.join(temp_dir, "file*.txt") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + + # Should find all special files plus any existing test files + found_files = [os.path.basename(file_path) for _, file_path in result] + + for special_file in special_files: + assert special_file in found_files + + def test_glob_source_mixed_file_types(self, test_files, temp_dir): + """Test glob source that matches multiple file types.""" + # Pattern that matches both txt and json files + pattern = os.path.join(temp_dir, "file*") + + source = GlobSource(pattern) + stream = source() + + result = list(stream) + + # Should find both text and json files + file_extensions = [os.path.splitext(file_path)[1] for _, file_path in result] + + assert ".txt" in file_extensions + assert ".json" in file_extensions + + def test_glob_source_case_sensitivity(self, temp_dir): + """Test glob source case sensitivity.""" + # Create files with different cases + files = ["Test.TXT", "test.txt", "TEST.txt"] + + for filename in files: + file_path = os.path.join(temp_dir, filename) + with open(file_path, "w") as f: + f.write(f"Content of {filename}") + + # Test exact case match + pattern = os.path.join(temp_dir, "test.txt") + source = GlobSource(pattern) + stream = source() + result = list(stream) + + # Should find at least the exact match + found_files = [os.path.basename(file_path) for _, file_path in result] + assert "test.txt" in found_files + + def test_glob_source_symlinks(self, temp_dir): + """Test glob source with symbolic links (if supported).""" + # Create a regular file + original_file = os.path.join(temp_dir, "original.txt") + with open(original_file, "w") as f: + f.write("Original content") + + try: + # Create a symbolic link + link_file = os.path.join(temp_dir, "link.txt") + os.symlink(original_file, link_file) + + pattern = os.path.join(temp_dir, "*.txt") + source = GlobSource(pattern) + stream = source() + result = list(stream) + + # Should find both original and link + file_paths = [file_path for _, file_path in result] + original_found = any("original.txt" in path for path in file_paths) + link_found = any("link.txt" in path for path in file_paths) + + assert original_found + # Link behavior depends on implementation + + except (OSError, NotImplementedError): + # Symlinks not supported on this system + pass + + def test_glob_source_error_handling(self, temp_dir): + """Test glob source error handling.""" + # Test with invalid pattern + invalid_pattern = "/nonexistent/path/*.txt" + + source = GlobSource(invalid_pattern) + stream = source() + + # Should handle gracefully (empty result or specific error) + try: + result = list(stream) + # If no error, should be empty + assert len(result) == 0 + except (OSError, FileNotFoundError): + # Expected error for invalid path + pass + + def test_glob_source_file_permissions(self, temp_dir): + """Test glob source with files of different permissions.""" + # Create a file and try to change permissions + restricted_file = os.path.join(temp_dir, "restricted.txt") + with open(restricted_file, "w") as f: + f.write("Restricted content") + + try: + # Try to make file unreadable + os.chmod(restricted_file, 0o000) + + pattern = os.path.join(temp_dir, "restricted.txt") + source = GlobSource(pattern) + stream = source() + + # Should handle permission errors gracefully + try: + result = list(stream) + # If successful, content might be empty or error + except PermissionError: + # Expected for restricted files + pass + + finally: + # Restore permissions for cleanup + try: + os.chmod(restricted_file, 0o644) + except: + pass diff --git a/tests/test_streams_operations/test_streams/__init__.py b/tests/test_streams_operations/test_streams/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_streams_operations/test_streams/test_base_classes.py b/tests/test_streams_operations/test_streams/test_base_classes.py new file mode 100644 index 0000000..7eecab3 --- /dev/null +++ b/tests/test_streams_operations/test_streams/test_base_classes.py @@ -0,0 +1,514 @@ +""" +Test module for base Stream and SyncStream classes. + +This module tests the fundamental stream functionality including +iteration, flow operations, labeling, key management, and invocation tracking. +""" + +from collections.abc import Collection, Iterator +import pytest +from unittest.mock import Mock, MagicMock +from abc import ABC + +from orcabridge.base import Stream, SyncStream, Operation, Invocation +from orcabridge.mappers import Join +from orcabridge.streams import SyncStreamFromLists, SyncStreamFromGenerator +from orcabridge.types import Tag, Packet + + +class ConcreteStream(Stream): + """Concrete Stream implementation for testing.""" + + def __init__(self, data: Collection[tuple[Tag, Packet]], label=None): + super().__init__(label=label) + self.data = data + + def __iter__(self): + return iter(self.data) + + +class ConcreteSyncStream(SyncStream): + """Concrete SyncStream implementation for testing.""" + + def __init__(self, data: Collection[tuple[Tag, Packet]], label=None): + super().__init__(label=label) + self.data = data + + def __iter__(self) -> Iterator[tuple[Tag, Packet]]: + return iter(self.data) + + +@pytest.fixture +def sample_stream_data(): + """Sample stream data for testing.""" + return [ + ({"id": 1, "type": "text"}, {"content": "Hello", "size": 5}), + ({"id": 2, "type": "text"}, {"content": "World", "size": 5}), + ({"id": 3, "type": "number"}, {"value": 42, "unit": "count"}), + ] + + +@pytest.fixture +def sample_tags_packets(sample_stream_data): + """Extract tags and packets from sample data.""" + tags, packets = zip(*sample_stream_data) + return list(tags), list(packets) + + +class TestStreamBase: + """Test cases for base Stream class.""" + + def test_stream_labels(self, sample_stream_data): + """Test Stream initialization with and without label.""" + + # Without label + stream = ConcreteStream(sample_stream_data) + assert stream.label == "ConcreteStream", ( + f"Label should default to class name {stream.__class__.__name__} but got {stream.label}" + ) + assert stream.invocation is None + + # With label + labeled_stream = ConcreteStream(sample_stream_data, label="test_stream") + assert labeled_stream.label == "test_stream" + + def test_stream_iteration(self, sample_stream_data): + """Test that Stream can be iterated over.""" + stream = ConcreteStream(sample_stream_data) + + result = list(stream) + assert result == sample_stream_data + + # Test multiple iterations + result2 = list(stream) + assert result2 == sample_stream_data + + def test_stream_flow(self, sample_stream_data): + """Test Stream.flow() method.""" + stream = ConcreteStream(sample_stream_data) + + flowed = stream.flow() + assert flowed == sample_stream_data + assert isinstance(flowed, list) + + def test_stream_identity_structure(self, sample_stream_data): + """Test Stream identity structure.""" + stream = ConcreteStream(sample_stream_data) + + # Default identity structure for uninvoked stream should be None + identity = stream.identity_structure() + # TODO: consider alternative behavior for identity structure for streams + assert identity is None + + def test_stream_keys_default(self, sample_stream_data): + """Test Stream keys method default behavior.""" + stream = ConcreteStream(sample_stream_data) + + tag_keys, packet_keys = stream.keys() + # Default implementation will be based on the first sample from the stream + assert tag_keys is not None and set(tag_keys) == set(["id", "type"]) + + assert packet_keys is not None and set(packet_keys) == set(["content", "size"]) + + def test_stream_repr(self, sample_stream_data): + """Test Stream string representation.""" + stream = ConcreteStream(sample_stream_data, label="test_stream") + + repr_str = repr(stream) + assert "ConcreteStream" in repr_str + assert "test_stream" in repr_str + + +class TestSyncStreamBase: + """Test cases for SyncStream base class.""" + + def test_syncstream_initialization(self, sample_stream_data): + """Test SyncStream initialization.""" + sync_stream = ConcreteSyncStream(sample_stream_data) + + assert isinstance(sync_stream, Stream) + assert isinstance(sync_stream, SyncStream) + + def test_syncstream_rshift_operator_dict(self, sample_stream_data): + """Test SyncStream >> operator with dictionary mapping.""" + sync_stream = SyncStreamFromLists(paired=sample_stream_data) + + # Test with dictionary (should use MapPackets) + mapping = {"content": "text", "size": "length"} + mapped_stream = sync_stream >> mapping + + assert isinstance(mapped_stream, SyncStream) + result = list(mapped_stream) + + # Check that mapping was applied + for (tag, packet), (ref_tag, ref_packet) in zip(result, sample_stream_data): + if "content" in ref_packet: + assert "text" in packet + assert packet["text"] == ref_packet["content"] + if "size" in ref_packet: + assert "length" in packet + assert packet["length"] == ref_packet["size"] + + def test_syncstream_rshift_operator_callable(self, sample_stream_data): + """Test SyncStream >> operator with callable transformer.""" + sync_stream = SyncStreamFromLists(paired=sample_stream_data) + + def add_processed_flag(stream): + """Add processed flag to all packets.""" + + def generator(): + for tag, packet in stream: + yield tag, {**packet, "processed": True} + + return SyncStreamFromGenerator(generator) + + transformed = sync_stream >> add_processed_flag + result = list(transformed) + + # Check that all packets have processed flag + for _, packet in result: + assert packet["processed"] is True + + def test_syncstream_mul_operator(self, sample_tags_packets): + """Test SyncStream * operator for joining streams.""" + tags1, packets1 = sample_tags_packets + stream1 = SyncStreamFromLists(tags1[:2], packets1[:2]) + + tags2 = [{"id": 1, "category": "A"}, {"id": 2, "category": "B"}] + packets2 = [{"priority": "high"}, {"priority": "low"}] + stream2 = SyncStreamFromLists(tags2, packets2) + + # Test join operation + joined = stream1 * stream2 + + assert joined.invocation is not None and isinstance( + joined.invocation.operation, Join + ), ( + f"* operator should be resulting from an Join object invocation but got {type(joined)}" + ) + result = list(joined) + + # Should have joined results where tags match + assert len(result) >= 0 # Exact count depends on tag matching logic + + def test_syncstream_mul_operator_type_error(self, sample_tags_packets): + """Test SyncStream * operator with invalid type.""" + tags, packets = sample_tags_packets + sync_stream = SyncStreamFromLists(tags, packets) + + with pytest.raises(TypeError, match="other must be a SyncStream"): + sync_stream * "not_a_stream" # type: ignore + + def test_syncstream_rshift_invalid_type(self, sample_tags_packets): + """Test SyncStream >> operator with invalid transformer type.""" + tags, packets = sample_tags_packets + sync_stream = SyncStreamFromLists(tags, packets) + + # Should handle non-dict, non-callable gracefully or raise appropriate error + with pytest.raises((TypeError, AttributeError)): + sync_stream >> 123 # type: ignore + + def test_syncstream_chaining_operations(self, sample_tags_packets): + """Test chaining multiple SyncStream operations.""" + tags, packets = sample_tags_packets + sync_stream = SyncStreamFromLists(tags, packets) + + # Chain multiple transformations + def add_flag(stream): + def generator(): + for tag, packet in stream: + yield tag, {**packet, "chained": True} + + return SyncStreamFromGenerator(generator) + + def add_counter(stream): + def generator(): + for i, (tag, packet) in enumerate(stream): + yield tag, {**packet, "counter": i} + + return SyncStreamFromGenerator(generator) + + result_stream = sync_stream >> add_flag >> add_counter + result = list(result_stream) + + # Check that both transformations were applied + for i, (tag, packet) in enumerate(result): + assert packet["chained"] is True + assert packet["counter"] == i + + +class TestSyncStreamFromLists: + """Test cases for SyncStreamFromLists implementation.""" + + def test_creation_from_lists(self, sample_tags_packets): + """Test SyncStreamFromLists creation.""" + tags, packets = sample_tags_packets + stream = SyncStreamFromLists(tags, packets) + + assert isinstance(stream, SyncStream) + result = list(stream) + + expected = list(zip(tags, packets)) + assert result == expected + + def test_creation_with_mismatched_lengths(self): + """Test SyncStreamFromLists with mismatched tag/packet lengths.""" + tags = [{"id": "1"}, {"id": "2"}] + packets = [{"data": "a"}] # One less packet + + # If strict (default), should raise a ValueError + with pytest.raises(ValueError): + stream = SyncStreamFromLists(tags, packets, strict=True) + + # If not strict, should handle gracefully and create based on the shortest length + stream = SyncStreamFromLists(tags, packets, strict=False) + result = list(stream) + + assert len(result) == 1 + assert result[0] == ({"id": "1"}, {"data": "a"}) + + def test_empty_lists(self): + """Test SyncStreamFromLists with empty lists.""" + stream = SyncStreamFromLists([], []) + result = list(stream) + + assert result == [] + + def test_keys_inference(self, sample_tags_packets): + """Test key inference from tag and packet data.""" + tags, packets = sample_tags_packets + stream = SyncStreamFromLists(tags, packets) + + tag_keys, packet_keys = stream.keys() + + # Should infer keys from the first element + expected_tag_keys = set() + expected_packet_keys = set() + + if tags: + expected_tag_keys.update(tags[0].keys()) + if packets: + expected_packet_keys.update(packets[0].keys()) + + assert tag_keys is not None and set(tag_keys) == expected_tag_keys + assert packet_keys is not None and set(packet_keys) == expected_packet_keys + + def test_multiple_iterations(self, sample_tags_packets): + """Test that SyncStreamFromLists can be iterated multiple times.""" + tags, packets = sample_tags_packets + stream = SyncStreamFromLists(tags, packets) + + result1 = list(stream) + result2 = list(stream) + + assert result1 == result2 + assert len(result1) == len(tags) + + +class TestSyncStreamFromGenerator: + """Test cases for SyncStreamFromGenerator implementation.""" + + def test_creation_from_generator(self, sample_stream_data): + """Test SyncStreamFromGenerator creation.""" + + def generator(): + for item in sample_stream_data: + yield item + + stream = SyncStreamFromGenerator(generator) + assert isinstance(stream, SyncStream) + + result = list(stream) + assert result == sample_stream_data + + def test_generator_multiple_iterations(self, sample_stream_data): + """Test that generator-based streams can be iterated multiple times""" + + def generator(): + for item in sample_stream_data: + yield item + + stream = SyncStreamFromGenerator(generator) + + # First iteration should work + result1 = list(stream) + assert result1 == sample_stream_data + + # Second iteration should work (new iterator instance) + result2 = list(stream) + assert result2 == sample_stream_data + + def test_empty_generator(self): + """Test SyncStreamFromGenerator with empty generator.""" + + def empty_generator(): + return + yield # This line is never reached + + stream = SyncStreamFromGenerator(empty_generator) + result = list(stream) + + assert result == [] + + def test_generator_with_exception(self): + """Test SyncStreamFromGenerator with generator that raises exception.""" + + def failing_generator(): + yield ({"id": "1"}, {"data": "ok"}) + raise ValueError("Generator failed") + + stream = SyncStreamFromGenerator(failing_generator) + + # Should propagate the exception + with pytest.raises(ValueError, match="Generator failed"): + list(stream) + + def test_lazy_evaluation(self): + """Test that SyncStreamFromGenerator is lazily evaluated.""" + call_count = {"count": 0} + + def counting_generator(): + call_count["count"] += 1 + yield ({"id": "1"}, {"data": "test"}) + + stream = SyncStreamFromGenerator(counting_generator) + + # Generator should not be called until iteration starts + assert call_count["count"] == 0 + + # Start iteration + iterator = iter(stream) + next(iterator) + + # Now generator should have been called + assert call_count["count"] == 1 + + def test_inferred_keys_with_generator(self): + """Test key inference with generator streams.""" + + def sample_generator(): + yield ({"id": "1", "type": "A"}, {"value": "10", "name": "test"}) + yield ({"id": "2", "type": "B"}, {"value": "20", "size": "5"}) + + stream = SyncStreamFromGenerator(sample_generator) + + # Keys should be inferred from generated data + tag_keys, packet_keys = stream.keys() + + # Note: This depends on implementation - may need to consume stream + # to infer keys, or may return None + if tag_keys is not None: + assert "id" in tag_keys + assert "type" in tag_keys + + if packet_keys is not None: + assert "value" in packet_keys + + def test_specified_keys_with_generator(self): + """Test key inference with generator streams.""" + + def sample_generator(): + yield ({"id": "1", "type": "A"}, {"value": "10", "name": "test"}) + yield ({"id": "2", "type": "B"}, {"value": "20", "size": "5"}) + + # Specify keys explicitly -- it need not match the actual content + stream = SyncStreamFromGenerator( + sample_generator, tag_keys=["id"], packet_keys=["group"] + ) + + # Keys should be based on what was specified at the construction + tag_keys, packet_keys = stream.keys() + + # Note: This depends on implementation - may need to consume stream + # to infer keys, or may return None + if tag_keys is not None: + assert "id" in tag_keys + assert "type" not in tag_keys + + if packet_keys is not None: + assert "value" not in packet_keys + assert "group" in packet_keys + + +class TestStreamIntegration: + """Integration tests for stream functionality.""" + + def test_stream_composition(self, sample_tags_packets): + """Test composing different stream types.""" + tags, packets = sample_tags_packets + + # Create streams from different sources + list_stream = SyncStreamFromLists(tags[:2], packets[:2]) + + def gen_func(): + yield tags[2], packets[2] + + gen_stream = SyncStreamFromGenerator(gen_func) + + # Both should work similarly + list_result = list(list_stream) + gen_result = list(gen_stream) + + assert len(list_result) == 2 + assert len(gen_result) == 1 + + # Combine results + all_data = list_result + gen_result + assert len(all_data) == 3 + + def test_stream_with_complex_data(self): + """Test streams with complex nested data.""" + complex_tags = [ + {"id": 1, "metadata": {"type": "nested", "level": 1}}, + {"id": 2, "metadata": {"type": "nested", "level": 2}}, + ] + complex_packets = [ + {"data": {"values": [1, 2, 3], "config": {"enabled": True}}}, + {"data": {"values": [4, 5, 6], "config": {"enabled": False}}}, + ] + + stream = SyncStreamFromLists(complex_tags, complex_packets) + result = list(stream) + + assert len(result) == 2 + + # Verify complex data is preserved + tag, packet = result[0] + assert tag["metadata"]["type"] == "nested" + assert packet["data"]["values"] == [1, 2, 3] + assert packet["data"]["config"]["enabled"] is True + + def test_stream_memory_efficiency(self): + """Test that generator streams don't consume excessive memory.""" + + def large_generator(): + for i in range(1000): + yield ({"id": i}, {"value": i * 2}) + + stream = SyncStreamFromGenerator(large_generator) + + # Process in chunks to test memory efficiency + count = 0 + for tag, packet in stream: + count += 1 + if count > 10: # Just test first few items + break + + assert count == 11 # Processed 11 items + + def test_stream_error_propagation(self, sample_tags_packets): + """Test that errors in stream data are properly propagated.""" + tags, packets = sample_tags_packets + + # Create stream with invalid data + invalid_tags = tags + [None] # Add invalid tag + invalid_packets = packets + [{"data": "valid"}] + + stream = SyncStreamFromLists(invalid_tags, invalid_packets) + + # Should handle None tags gracefully or raise appropriate error + result = list(stream) + + # The None tag should be included as-is + assert len(result) == 4 + assert result[-1] == (None, {"data": "valid"}) diff --git a/tests/test_streams_operations/test_streams/test_sync_stream_implementations.py b/tests/test_streams_operations/test_streams/test_sync_stream_implementations.py new file mode 100644 index 0000000..3b64887 --- /dev/null +++ b/tests/test_streams_operations/test_streams/test_sync_stream_implementations.py @@ -0,0 +1,578 @@ +""" +Test module for SyncStream concrete implementations. + +This module tests the specific implementations of SyncStream including +SyncStreamFromLists and SyncStreamFromGenerator, focusing on their unique +behaviors, performance characteristics, and edge cases. +""" + +import pytest +from unittest.mock import Mock, patch +import gc + +from orcabridge.stream import SyncStreamFromLists, SyncStreamFromGenerator +from orcabridge.base import SyncStream + + +@pytest.fixture +def sample_data(): + """Sample data for stream testing.""" + return [ + ({"id": 1, "type": "doc"}, {"content": "Hello", "size": 5}), + ({"id": 2, "type": "doc"}, {"content": "World", "size": 5}), + ({"id": 3, "type": "img"}, {"pixels": 1920 * 1080, "format": "png"}), + ] + + +@pytest.fixture +def sample_tags_packets(sample_data): + """Extract tags and packets separately.""" + tags, packets = zip(*sample_data) + return list(tags), list(packets) + + +class TestSyncStreamFromLists: + """Comprehensive tests for SyncStreamFromLists implementation.""" + + def test_basic_creation_and_iteration(self, sample_tags_packets): + """Test basic creation and iteration functionality.""" + tags, packets = sample_tags_packets + stream = SyncStreamFromLists(tags, packets) + + # Test basic properties + assert isinstance(stream, SyncStream) + + # Test iteration + result = list(stream) + expected = list(zip(tags, packets)) + assert result == expected + + def test_creation_with_empty_lists(self): + """Test creation with empty tag and packet lists.""" + stream = SyncStreamFromLists([], []) + + result = list(stream) + assert result == [] + + # Test keys with empty stream + tag_keys, packet_keys = stream.keys() + assert tag_keys == [] + assert packet_keys == [] + + def test_creation_with_single_item(self): + """Test creation with single tag-packet pair.""" + tags = [{"id": 1}] + packets = [{"data": "test"}] + stream = SyncStreamFromLists(tags, packets) + + result = list(stream) + assert result == [({"id": 1}, {"data": "test"})] + + def test_mismatched_list_lengths(self): + """Test behavior with different length tag and packet lists.""" + tags = [{"id": 1}, {"id": 2}, {"id": 3}] + packets = [{"data": "a"}, {"data": "b"}] # Shorter list + + stream = SyncStreamFromLists(tags, packets) + result = list(stream) + + # Should zip to shortest length + assert len(result) == 2 + assert result == [ + ({"id": 1}, {"data": "a"}), + ({"id": 2}, {"data": "b"}), + ] + + def test_keys_inference_comprehensive(self): + """Test comprehensive key inference from data.""" + tags = [ + {"id": 1, "type": "A", "category": "test"}, + {"id": 2, "type": "B"}, # Missing category + {"id": 3, "category": "prod", "extra": "value"}, # Missing type, has extra + ] + packets = [ + {"data": "hello", "size": 5, "meta": {"info": "test"}}, + {"data": "world", "count": 10}, # Missing size, meta; has count + {"size": 3, "format": "json"}, # Missing data; has format + ] + + stream = SyncStreamFromLists(tags, packets) + tag_keys, packet_keys = stream.keys() + + # Should include all keys found across all items + expected_tag_keys = {"id", "type", "category", "extra"} + expected_packet_keys = {"data", "size", "meta", "count", "format"} + + assert set(tag_keys) == expected_tag_keys + assert set(packet_keys) == expected_packet_keys + + def test_multiple_iterations_consistency(self, sample_tags_packets): + """Test that multiple iterations return consistent results.""" + tags, packets = sample_tags_packets + stream = SyncStreamFromLists(tags, packets) + + # Multiple iterations should be identical + result1 = list(stream) + result2 = list(stream) + result3 = list(stream) + + assert result1 == result2 == result3 + assert len(result1) == len(tags) + + def test_iteration_with_generators_as_input(self): + """Test creation with generator inputs (should work since converted to lists).""" + + def tag_gen(): + for i in range(3): + yield {"id": i} + + def packet_gen(): + for i in range(3): + yield {"value": i * 10} + + # Should accept generators and convert them + stream = SyncStreamFromLists(list(tag_gen()), list(packet_gen())) + result = list(stream) + + assert len(result) == 3 + assert result[0] == ({"id": 0}, {"value": 0}) + assert result[1] == ({"id": 1}, {"value": 10}) + assert result[2] == ({"id": 2}, {"value": 20}) + + def test_memory_efficiency_large_lists(self): + """Test memory efficiency with large lists.""" + # Create large but not excessive lists + size = 1000 + tags = [{"id": i} for i in range(size)] + packets = [{"value": i * 2} for i in range(size)] + + stream = SyncStreamFromLists(tags, packets) + + # Should be able to iterate without memory issues + count = 0 + for tag, packet in stream: + count += 1 + assert tag["id"] == packet["value"] // 2 + + assert count == size + + def test_data_types_preservation(self): + """Test that various data types are preserved correctly.""" + tags = [ + {"int": 42, "float": 3.14, "str": "hello"}, + {"bool": True, "none": None, "list": [1, 2, 3]}, + {"dict": {"nested": "value"}, "tuple": (1, 2)}, + ] + packets = [ + {"complex": 1 + 2j, "bytes": b"binary", "set": {1, 2, 3}}, + {"lambda": lambda x: x * 2}, # Function objects + {"custom": {"deep": {"nesting": {"value": 123}}}}, + ] + + stream = SyncStreamFromLists(tags, packets) + result = list(stream) + + # Verify data type preservation + assert result[0][0]["int"] == 42 + assert result[0][0]["float"] == 3.14 + assert result[0][1]["complex"] == 1 + 2j + assert result[0][1]["bytes"] == b"binary" + + assert result[1][0]["bool"] is True + assert result[1][0]["none"] is None + assert callable(result[1][1]["lambda"]) + + assert result[2][0]["dict"]["nested"] == "value" + assert result[2][1]["custom"]["deep"]["nesting"]["value"] == 123 + + def test_mutable_data_safety(self): + """Test that mutable data doesn't cause unexpected sharing.""" + shared_dict = {"shared": "value"} + tags = [{"ref": shared_dict}, {"ref": shared_dict}] + packets = [{"data": "a"}, {"data": "b"}] + + stream = SyncStreamFromLists(tags, packets) + result = list(stream) + + # Modify the shared dict + shared_dict["shared"] = "modified" + + # The stream results should reflect the change (references preserved) + assert result[0][0]["ref"]["shared"] == "modified" + assert result[1][0]["ref"]["shared"] == "modified" + + def test_label_and_metadata(self, sample_tags_packets): + """Test stream labeling and metadata handling.""" + tags, packets = sample_tags_packets + + # Test with custom label + stream = SyncStreamFromLists(tags, packets, label="test_stream") + assert stream.label == "test_stream" + + # Test default label generation + stream_auto = SyncStreamFromLists(tags, packets) + assert "SyncStreamFromLists_" in stream_auto.label + + +class TestSyncStreamFromGenerator: + """Comprehensive tests for SyncStreamFromGenerator implementation.""" + + def test_basic_creation_and_iteration(self, sample_data): + """Test basic creation and iteration functionality.""" + + def generator(): + for item in sample_data: + yield item + + stream = SyncStreamFromGenerator(generator) + assert isinstance(stream, SyncStream) + + result = list(stream) + assert result == sample_data + + def test_empty_generator(self): + """Test with generator that yields nothing.""" + + def empty_gen(): + return + yield # Never reached + + stream = SyncStreamFromGenerator(empty_gen) + result = list(stream) + assert result == [] + + def test_single_item_generator(self): + """Test with generator that yields single item.""" + + def single_gen(): + yield ({"id": 1}, {"data": "test"}) + + stream = SyncStreamFromGenerator(single_gen) + result = list(stream) + assert result == [({"id": 1}, {"data": "test"})] + + def test_generator_exhaustion(self, sample_data): + """Test that generators are exhausted after iteration.""" + + def generator(): + for item in sample_data: + yield item + + stream = SyncStreamFromGenerator(generator) + + # First iteration consumes generator + result1 = list(stream) + assert result1 == sample_data + + # Second iteration gets empty results (generator exhausted) + result2 = list(stream) + assert result2 == [] + + def test_lazy_evaluation(self): + """Test that generator evaluation is lazy.""" + call_log = [] + + def tracking_generator(): + call_log.append("generator_started") + for i in range(3): + call_log.append(f"yielding_{i}") + yield ({"id": i}, {"value": i * 10}) + call_log.append("generator_finished") + + stream = SyncStreamFromGenerator(tracking_generator) + + # Generator should not have started yet + assert call_log == [] + + # Start iteration but don't consume everything + iterator = iter(stream) + next(iterator) + + # Should have started and yielded first item + assert "generator_started" in call_log + assert "yielding_0" in call_log + assert "yielding_1" not in call_log + + def test_generator_with_exception(self): + """Test generator that raises exception during iteration.""" + + def failing_generator(): + yield ({"id": 1}, {"data": "ok"}) + yield ({"id": 2}, {"data": "ok"}) + raise ValueError("Something went wrong") + yield ({"id": 3}, {"data": "never_reached"}) + + stream = SyncStreamFromGenerator(failing_generator) + + # Should propagate exception + with pytest.raises(ValueError, match="Something went wrong"): + list(stream) + + def test_generator_partial_consumption(self, sample_data): + """Test partial consumption of generator.""" + + def generator(): + for item in sample_data: + yield item + + stream = SyncStreamFromGenerator(generator) + + # Consume only part of the stream + iterator = iter(stream) + first_item = next(iterator) + second_item = next(iterator) + + assert first_item == sample_data[0] + assert second_item == sample_data[1] + + # Rest of generator should still be available + remaining = list(iterator) + assert remaining == sample_data[2:] + + def test_generator_with_infinite_sequence(self): + """Test generator with infinite sequence (partial consumption).""" + + def infinite_generator(): + i = 0 + while True: + yield ({"id": i}, {"value": i * i}) + i += 1 + + stream = SyncStreamFromGenerator(infinite_generator) + + # Consume just first few items + iterator = iter(stream) + results = [] + for _ in range(5): + results.append(next(iterator)) + + assert len(results) == 5 + assert results[0] == ({"id": 0}, {"value": 0}) + assert results[4] == ({"id": 4}, {"value": 16}) + + def test_generator_with_complex_logic(self): + """Test generator with complex internal logic.""" + + def complex_generator(): + # Generator with state and complex logic + state = {"count": 0, "filter_odd": True} + + for i in range(10): + state["count"] += 1 + + if state["filter_odd"] and i % 2 == 1: + continue # Skip odd numbers initially + + if i == 6: # Change behavior mid-stream + state["filter_odd"] = False + + yield ({"id": i, "count": state["count"]}, {"value": i * 2}) + + stream = SyncStreamFromGenerator(complex_generator) + result = list(stream) + + # Should have skipped odds initially, then included them + ids = [item[0]["id"] for item in result] + assert 0 in ids and 2 in ids and 4 in ids and 6 in ids # Evens + assert 1 not in ids and 3 not in ids and 5 not in ids # Early odds skipped + assert 7 in ids and 8 in ids and 9 in ids # Later odds included + + def test_keys_inference_limitation(self): + """Test that key inference may be limited for generators.""" + + def generator(): + yield ({"id": 1, "type": "A"}, {"data": "hello", "size": 5}) + yield ({"id": 2, "type": "B"}, {"data": "world", "count": 10}) + + stream = SyncStreamFromGenerator(generator) + + # Keys might not be available without consuming stream + tag_keys, packet_keys = stream.keys() + + # Implementation-dependent: might be None or inferred + if tag_keys is not None: + assert isinstance(tag_keys, (list, tuple, set)) + if packet_keys is not None: + assert isinstance(packet_keys, (list, tuple, set)) + + def test_memory_efficiency(self): + """Test memory efficiency of generator streams.""" + + def memory_efficient_generator(): + # Generate large number of items without storing them all + for i in range(10000): + yield ({"id": i}, {"value": i * 2}) + + stream = SyncStreamFromGenerator(memory_efficient_generator) + + # Process in chunks to verify memory efficiency + count = 0 + for tag, packet in stream: + count += 1 + assert tag["id"] == packet["value"] // 2 + + if count >= 100: # Don't process all 10k items in test + break + + assert count == 100 + + def test_generator_function_vs_generator_object(self, sample_data): + """Test creation with generator function vs generator object.""" + + def gen_function(): + for item in sample_data: + yield item + + # Test with generator function (should work) + stream1 = SyncStreamFromGenerator(gen_function) + result1 = list(stream1) + + # Test with generator object (should work) + gen_object = gen_function() + stream2 = SyncStreamFromGenerator(lambda: gen_object) + result2 = list(stream2) + + assert result1 == sample_data + assert result2 == sample_data + + def test_label_and_metadata(self, sample_data): + """Test stream labeling and metadata handling.""" + + def generator(): + for item in sample_data: + yield item + + # Test with custom label + stream = SyncStreamFromGenerator(generator, label="test_gen_stream") + assert stream.label == "test_gen_stream" + + # Test default label generation + stream_auto = SyncStreamFromGenerator(generator) + assert "SyncStreamFromGenerator_" in stream_auto.label + + +class TestStreamImplementationComparison: + """Tests comparing different stream implementations.""" + + def test_equivalent_output(self, sample_data): + """Test that both implementations produce equivalent output for same data.""" + tags, packets = zip(*sample_data) + + # Create streams from same data using different implementations + list_stream = SyncStreamFromLists(list(tags), list(packets)) + + def generator(): + for item in sample_data: + yield item + + gen_stream = SyncStreamFromGenerator(generator) + + # Results should be identical + list_result = list(list_stream) + gen_result = list(gen_stream) + + assert list_result == gen_result == sample_data + + def test_multiple_iteration_behavior(self, sample_data): + """Test different behavior in multiple iterations.""" + tags, packets = zip(*sample_data) + + list_stream = SyncStreamFromLists(list(tags), list(packets)) + + def generator(): + for item in sample_data: + yield item + + gen_stream = SyncStreamFromGenerator(generator) + + # List stream should support multiple iterations + list_result1 = list(list_stream) + list_result2 = list(list_stream) + assert list_result1 == list_result2 + + # Generator stream should only work once + gen_result1 = list(gen_stream) + gen_result2 = list(gen_stream) + assert gen_result1 == sample_data + assert gen_result2 == [] # Exhausted + + def test_performance_characteristics(self): + """Test performance characteristics of different implementations.""" + import time + + size = 1000 + tags = [{"id": i} for i in range(size)] + packets = [{"value": i * 2} for i in range(size)] + + # Time list-based stream creation and consumption + start = time.time() + list_stream = SyncStreamFromLists(tags, packets) + list_result = list(list_stream) + list_time = time.time() - start + + # Time generator-based stream creation and consumption + def generator(): + for tag, packet in zip(tags, packets): + yield tag, packet + + start = time.time() + gen_stream = SyncStreamFromGenerator(generator) + gen_result = list(gen_stream) + gen_time = time.time() - start + + # Results should be equivalent + assert list_result == gen_result + + # Both should complete in reasonable time (implementation dependent) + assert list_time < 1.0 # Should be fast + assert gen_time < 1.0 # Should be fast + + def test_error_handling_consistency(self): + """Test that error handling is consistent between implementations.""" + + def failing_generator(): + yield ({"id": 1}, {"data": "ok"}) + raise RuntimeError("Generator error") + + # Generator stream should propagate error + gen_stream = SyncStreamFromGenerator(failing_generator) + with pytest.raises(RuntimeError, match="Generator error"): + list(gen_stream) + + # List stream with problematic data + tags = [{"id": 1}, None] # None tag might cause issues + packets = [{"data": "ok"}, {"data": "also_ok"}] + + list_stream = SyncStreamFromLists(tags, packets) + result = list(list_stream) # Should handle None gracefully + + assert len(result) == 2 + assert result[1] == (None, {"data": "also_ok"}) + + def test_integration_with_operations(self, sample_data): + """Test that both stream types work equivalently with operations.""" + from orcabridge.mapper import Filter + + tags, packets = zip(*sample_data) + + # Create equivalent streams + list_stream = SyncStreamFromLists(list(tags), list(packets)) + + def generator(): + for item in sample_data: + yield item + + gen_stream = SyncStreamFromGenerator(generator) + + # Apply same operation to both + filter_op = Filter(lambda tag, packet: tag["id"] > 1) + + filtered_list = filter_op(list_stream) + filtered_gen = filter_op(gen_stream) + + list_result = list(filtered_list) + gen_result = list(filtered_gen) + + # Results should be equivalent + assert list_result == gen_result + assert len(list_result) == 2 # Should have filtered out id=1 diff --git a/uv.lock b/uv.lock index 23ca96e..26e122a 100644 --- a/uv.lock +++ b/uv.lock @@ -2,10 +2,20 @@ version = 1 revision = 2 requires-python = ">=3.10" resolution-markers = [ - "python_full_version >= '3.11'", + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", "python_full_version < '3.11'", ] +[[package]] +name = "annotated-types" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" }, +] + [[package]] name = "appnope" version = "0.1.4" @@ -15,6 +25,80 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321, upload-time = "2024-02-06T09:43:09.663Z" }, ] +[[package]] +name = "arro3-core" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.12'" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/d9/eb/2a166478dfc951958bf4cd33891bfa346ab9c53c3a87f5ffe99dbe981577/arro3_core-0.5.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a85c4d78fb4a3e3b216b01e44ac16121a06e80169555cd0f7b8fcf038a6c14b3", size = 2448695, upload-time = "2025-05-31T23:17:55.526Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c0/2b1719accd4cc2f81bd36ad79a16750a63e0d7a5132e43115b586d52e21d/arro3_core-0.5.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2dd7a3b78c8936407e4eebbbe3134410d1be0c51fb697a8b8a5c8118690190a9", size = 2155415, upload-time = "2025-05-31T23:17:57.992Z" }, + { url = "https://files.pythonhosted.org/packages/9c/dc/6bcb859c4a83fff95b2ccc906c027db1f0396610a57bafc90bd933dcce83/arro3_core-0.5.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fae76973505d64cebf26a30c78d37a5a1fe012b3d6a6c682fea33ebd1dfc4d99", size = 2594341, upload-time = "2025-05-31T23:18:01.536Z" }, + { url = "https://files.pythonhosted.org/packages/6f/48/109cf08ca7532636d4c356a421e1620e7b01fb6882e12b6afbfa4b933c38/arro3_core-0.5.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c65d5ffb89cf9bcc62bb7f64beb049877ca03b504841ffc3cab6e853a13637c", size = 2637344, upload-time = "2025-05-31T23:18:05.307Z" }, + { url = "https://files.pythonhosted.org/packages/b2/4b/5a9dfc81195c8fcf2f99f9cb8f3d8c23ca9da541964d44e409a01ab06d3b/arro3_core-0.5.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad9e3e69a0888bf1cd2c9cf2e7d60787ac9bf3b9508937bfb6ff55aba9a6b56b", size = 2878497, upload-time = "2025-05-31T23:18:08.803Z" }, + { url = "https://files.pythonhosted.org/packages/f1/26/a2a0685f3648afb20bbe4920cee6dc8a29b9942fa8c0190f6a8fc3ad4ef3/arro3_core-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36484d31141691c48d6e48f1c387d3b19fe5a814ffcde26b2ac04ebe68f81c76", size = 2540359, upload-time = "2025-05-31T23:18:12.092Z" }, + { url = "https://files.pythonhosted.org/packages/64/40/6b22f0f094d905d610945a9b7d4662d5f143f6638c37e89fb888443aee64/arro3_core-0.5.1-cp310-cp310-manylinux_2_24_aarch64.whl", hash = "sha256:78942ee33f55758ce0138b30377185f2d93b9221fb5c239075b56159b3e3fb5b", size = 2289699, upload-time = "2025-05-31T23:18:15.895Z" }, + { url = "https://files.pythonhosted.org/packages/cf/46/eebe9826aeca54bc04bf8ed6e9506134dcf1d02a960482b0164a98d51800/arro3_core-0.5.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:063b9ffe896dbd01649eb46d04b55f19eb6bc7fa505d1781d64308e13a2510cc", size = 2723968, upload-time = "2025-05-31T23:18:19.597Z" }, + { url = "https://files.pythonhosted.org/packages/90/bc/5c2361010692854efb47211e15eeeb9cef02eb037dbb95b9dd68b4554ba7/arro3_core-0.5.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a818344b61b59c09c3f6550c03e5b01678535160b35d38eaa5f988667df69187", size = 2435669, upload-time = "2025-05-31T23:18:22.649Z" }, + { url = "https://files.pythonhosted.org/packages/39/0d/1fef7dcca81696bdea0e79971155b114fb3fb204f177eed25a07f856f57a/arro3_core-0.5.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:60fa11fe05f3b48e7b37c1d4f12d94ef678514d2e908033ac30d10d04b1bd957", size = 2869358, upload-time = "2025-05-31T23:18:27.008Z" }, + { url = "https://files.pythonhosted.org/packages/cc/02/1196e7f795658a5ef7c4b5811fe84845025e7baf391d05be36e763336156/arro3_core-0.5.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8d1ce524ca27598154f84cf980c6fa4baf0c1379584de2e922e88905dfb939dd", size = 2797000, upload-time = "2025-05-31T23:18:30.694Z" }, + { url = "https://files.pythonhosted.org/packages/1c/ea/31bc0bc32ad3e22a937c866b685e0b1123f4747dabc23703531d7626a5d2/arro3_core-0.5.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2546df75769b60bbd74aa7a169cd538e909aabf2200a99edfdda542e560b5c11", size = 2709346, upload-time = "2025-05-31T23:18:34.125Z" }, + { url = "https://files.pythonhosted.org/packages/fb/2c/6bfb3a4cd26b1fed099767e124063f0b4fe5e7f0cab0160004ba5900cad0/arro3_core-0.5.1-cp310-cp310-win_amd64.whl", hash = "sha256:d89350dc36f58c9c0fb941fbcd46e2e00f76f3438844ef3dce2419ce64631739", size = 2611596, upload-time = "2025-05-31T23:18:37.826Z" }, + { url = "https://files.pythonhosted.org/packages/86/45/c2540f04330f52f431a0ca0824c15d86fc38dd8b3f2af027a41a90ea91e7/arro3_core-0.5.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:e6c43f2f59cd43044663969031c4ef29aab76247b5bda74800187a8b9bda3b9e", size = 2448953, upload-time = "2025-05-31T23:18:40.996Z" }, + { url = "https://files.pythonhosted.org/packages/4b/8f/9fc60dcc201f72f3d9d2ca86b61ff374eb640b58a65660b8de2ac53654d6/arro3_core-0.5.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:006214e68df6f66bbd1712989258cac2b307085627962348749cc2802b843f25", size = 2155535, upload-time = "2025-05-31T23:18:44.178Z" }, + { url = "https://files.pythonhosted.org/packages/5e/9e/4e6a3c41b52b08b8f34f7830df2a0e499d3e4ab43c6d45984e2af13fa780/arro3_core-0.5.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:be77d366d43025599a5a0c520cced43c181f750cf6bcc174a72a97a7338f9e37", size = 2594752, upload-time = "2025-05-31T23:18:47.586Z" }, + { url = "https://files.pythonhosted.org/packages/bd/77/94d8099c8fbfe3489ec92da76f65844b276f82b18d9cb6a547a717bd38cc/arro3_core-0.5.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ca7cba980b3d2e3552dd06da67c8c298d970bd9430ed661a2316c893bfca3873", size = 2637291, upload-time = "2025-05-31T23:18:50.539Z" }, + { url = "https://files.pythonhosted.org/packages/ff/22/050c75161bcbe2e6b3ff5f8de11f760a376523fa905f4787b09bab65a4b5/arro3_core-0.5.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1866f014ca091049692d81601760b65fdad7b779d9c73698f709cd6ee4e8b5c3", size = 2869405, upload-time = "2025-05-31T23:18:53.73Z" }, + { url = "https://files.pythonhosted.org/packages/ac/88/87a3293db47dab5b23ecd910532f02c56d15f52920fc5d72404935126345/arro3_core-0.5.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e1433e98b4385f2565c59d69c1bbb4f18da7d2693d2d9712e219e020e8f9025", size = 2540544, upload-time = "2025-05-31T23:18:56.954Z" }, + { url = "https://files.pythonhosted.org/packages/71/e8/f85ce3be71c967b24e96c3af589ae3390548ab0d9fd69d5ed535225fd620/arro3_core-0.5.1-cp311-cp311-manylinux_2_24_aarch64.whl", hash = "sha256:afba61734d4fc772ddf26888c299f94157e530a080835a981431a37398168fd6", size = 2289505, upload-time = "2025-05-31T23:19:00.354Z" }, + { url = "https://files.pythonhosted.org/packages/9c/4b/432eb5135fbcc5d8770ad7bd4193545e97588caf1f690d4f724bbb927632/arro3_core-0.5.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:69b8885acf0e94b54adb6f060b4c41ee138d171b37a6356b690bece6b911565d", size = 2724357, upload-time = "2025-05-31T23:19:04.201Z" }, + { url = "https://files.pythonhosted.org/packages/83/91/056ab3166c5e562eab66477f573aff02bb4b92ba0de8affffd1bace6e50c/arro3_core-0.5.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2fe8f6d43697719abf822f9f02df7547681669c092b41bcee2b3a689f99e1588", size = 2435801, upload-time = "2025-05-31T23:19:07.617Z" }, + { url = "https://files.pythonhosted.org/packages/5a/5f/b7a6a2106ba508e20f9788bb53c71b56211defd3729c7bcfe6ec09d36fd1/arro3_core-0.5.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a2aa298a78135d993e9257f110ac140e008d7bdc11eb23d8bc1c02493afbdf5a", size = 2869804, upload-time = "2025-05-31T23:19:11.059Z" }, + { url = "https://files.pythonhosted.org/packages/f6/e3/d95fbff21b27b06faa892c65621ea429391d0bfb926cdeb557db2d452a33/arro3_core-0.5.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:508688336dfc4667f8571115924857ae4629044ebeb4d3dedeabc33e287b2bca", size = 2797201, upload-time = "2025-05-31T23:19:14.674Z" }, + { url = "https://files.pythonhosted.org/packages/45/07/7ab65b01110e9459db2f2d37972826aa31a367ee98e95c7664f0eb13963d/arro3_core-0.5.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:31463bda8a942f5ae0e4a06c8fbe2424367b820d93f6f3b82c6f775f9a966780", size = 2709306, upload-time = "2025-05-31T23:19:17.913Z" }, + { url = "https://files.pythonhosted.org/packages/a7/15/0bebe279425bb70bd0a712dd45dcb4418deb9f32057ff5b9efd7947a65d3/arro3_core-0.5.1-cp311-cp311-win_amd64.whl", hash = "sha256:0223d878f5f23c17600cab853cecce963c38fe365efa5f157f016706314018f1", size = 2611539, upload-time = "2025-05-31T23:19:21.358Z" }, + { url = "https://files.pythonhosted.org/packages/c9/9c/af3c6127548630beaa319746770265b2fb996bb3e6dba8d16f78910bc070/arro3_core-0.5.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:afccbaf951a84d6eafb4384692ea557ad06887c6db8825e9417647f805735936", size = 2438592, upload-time = "2025-05-31T23:19:24.494Z" }, + { url = "https://files.pythonhosted.org/packages/d8/50/057c93a846bbc5e5e55a976ea4fc00255332f64e5f9b1abfc218bb184f48/arro3_core-0.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:37325ec2f47a4dce40fa871935000708b545f3981c8e2bde7d7a031f2e098865", size = 2145488, upload-time = "2025-05-31T23:19:27.886Z" }, + { url = "https://files.pythonhosted.org/packages/1f/8c/cbb785ecb9a0df254f5a761fc5ac7c8c5a6f93b0116e994ecf2797984f80/arro3_core-0.5.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:63ac803d46127d8c01bc4ffbb5911f10e51c063c9bcc76ba0258378bda683383", size = 2592145, upload-time = "2025-05-31T23:19:31.499Z" }, + { url = "https://files.pythonhosted.org/packages/d1/ee/405d2bdb88a97f03fb64f2cb655274f58439f8aa6e3cf9d2034581899edb/arro3_core-0.5.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dbaf8ccce7637631ed5dc7d53b58aaa0f8c7e935b772ff10a31c9cee571b799a", size = 2637122, upload-time = "2025-05-31T23:19:34.918Z" }, + { url = "https://files.pythonhosted.org/packages/6a/0c/9f611398d63b686ea990d6dcf88a98ec7bc66a78d12c27829f80bf8696bc/arro3_core-0.5.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b76a47e326311dea3b8ff302ec0d2741d85a7736be472e39314a87569e4552c", size = 2876345, upload-time = "2025-05-31T23:19:38.3Z" }, + { url = "https://files.pythonhosted.org/packages/d8/57/1bc7bd889c65d190a7ce609a720b16f0280e84da87f1c408c34fd099ecaf/arro3_core-0.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4331989dbbeb6c47db6ee4502df940e90a04b1066bc4d044b9f3c273eb5a0aeb", size = 2537679, upload-time = "2025-05-31T23:19:42.33Z" }, + { url = "https://files.pythonhosted.org/packages/ef/29/9a692f67bdcf2bb07a2635d526ee8751db0676fdb6074d2eee64918ec7db/arro3_core-0.5.1-cp312-cp312-manylinux_2_24_aarch64.whl", hash = "sha256:b9ec0d1e4ffe4cc831dfe67fcb0ca9b263743ca56f47bd433ee53af1993687a9", size = 2286635, upload-time = "2025-05-31T23:19:45.716Z" }, + { url = "https://files.pythonhosted.org/packages/2f/23/b37f5eb6db22d02e0c23b502e4d29d85cb8483706feb76a1a2b5b33498f7/arro3_core-0.5.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ce7009d5b24c21b641d9e254d81ff7e3f89f8bc20100d4f56e36211ccc72a897", size = 2722295, upload-time = "2025-05-31T23:19:49.461Z" }, + { url = "https://files.pythonhosted.org/packages/e7/bf/7cc411fbbf78049c0c3395c5757f51df569dee1f20d212a9822ead974315/arro3_core-0.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:924ce175770c6dc351ff9482a716de6d54afff817d340bea1902f60c9c8edec8", size = 2431978, upload-time = "2025-05-31T23:19:52.724Z" }, + { url = "https://files.pythonhosted.org/packages/73/0a/52d132ca671739f6c82529a4cc75d4872b9ca0a52dce4b8e7c930af5adf1/arro3_core-0.5.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:aa6a8c4bd99f846fcd7f593f1177c0cf248f6c447c9f612cf336b6426ab53429", size = 2866622, upload-time = "2025-05-31T23:19:56.917Z" }, + { url = "https://files.pythonhosted.org/packages/df/7d/925aa386f08a4f0e6b1f54625c8b7536fb3c6f1335377c553fc16b330e75/arro3_core-0.5.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7cbec3c4422fbfc41efb68def5020377134ff64cda9ce6f302ed93a600765906", size = 2793027, upload-time = "2025-05-31T23:20:00.373Z" }, + { url = "https://files.pythonhosted.org/packages/4e/0c/dd4a90153fefa49829b20358c6f23b7d33c2613b6e05f2956e4775a9e0a1/arro3_core-0.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3485b3f00366407960f30b4b5032fdfeea87cdb103b493c38ad0592534ba9f0b", size = 2706493, upload-time = "2025-05-31T23:20:03.829Z" }, + { url = "https://files.pythonhosted.org/packages/45/54/f6aafaef5388fe260e4bc02d00442e8bf2f9966637b2ddcb1661d8366c59/arro3_core-0.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:36ccfc7316f5aa534ee0d647720932bf6c18546e55034dadea625a9bb84c9baf", size = 2612703, upload-time = "2025-05-31T23:20:07.906Z" }, + { url = "https://files.pythonhosted.org/packages/ce/3f/52336dca7f4784b778d458f7071e5746db33825cb57509fd35196522e5df/arro3_core-0.5.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:bb7fba3c4324db78615b5440ac51f46022ce7674489d96f8916491c117102e47", size = 2438140, upload-time = "2025-05-31T23:20:11.45Z" }, + { url = "https://files.pythonhosted.org/packages/b0/02/32d2c8fa81b33e587b9b6be0a71a0e46523f50f1b20d1903b0fb3f1d9cad/arro3_core-0.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:39bad825cb042f22be5f5ab019844541398a3393d154e3675013b4ebb825b3b9", size = 2145410, upload-time = "2025-05-31T23:20:14.919Z" }, + { url = "https://files.pythonhosted.org/packages/d1/72/4632d4240f2d10de16050314263932c80a7bfabab22688e3dcdc1505a0d6/arro3_core-0.5.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6520a6cc6e22fe2f8064dc8e4f93961e05fb9a486c921f71a5ef49843c27d24", size = 2591203, upload-time = "2025-05-31T23:20:18.219Z" }, + { url = "https://files.pythonhosted.org/packages/03/c6/8fd3fcf7a1ccfaeb62827457785293a5ad1a8bf44623903d7e5d99212cb5/arro3_core-0.5.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:63e9e96c034177721b8d5af36d4deff3e93411a24b009b4565e08711cddbbc75", size = 2636665, upload-time = "2025-05-31T23:20:21.805Z" }, + { url = "https://files.pythonhosted.org/packages/6a/84/f5df7ed0eeb1fdaa3cd4d19fb829dca791c3b5108e5f5350a50ff34da914/arro3_core-0.5.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:639083eb1712bd39540761a25ab786ba9cb51e0710bb77b21499a2914ba076d4", size = 2883496, upload-time = "2025-05-31T23:20:25.701Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e1/6ab0dd6f362f95ef855d2ba7aacf55c9dd08c55a3d8c5339eafa20f3e0f3/arro3_core-0.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4876a3c34bd54d970c498e2f61bfb7e36306934fd6acbfa5de497f093972bf0", size = 2536753, upload-time = "2025-05-31T23:20:29.237Z" }, + { url = "https://files.pythonhosted.org/packages/53/20/b0d9bd9b6ccac1c53abb29961046364fb1fba84e9ebd3726ff996bb07b53/arro3_core-0.5.1-cp313-cp313-manylinux_2_24_aarch64.whl", hash = "sha256:a4b93fcc5464bd2b638402b56032a1d3cecb78d668d0aa1035d2ee7ee7487abb", size = 2286389, upload-time = "2025-05-31T23:20:32.66Z" }, + { url = "https://files.pythonhosted.org/packages/49/21/8338d0a2ede9128dc46f44601b584ec3544f9ee2d43c841307d563e8cdfa/arro3_core-0.5.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8effd284a02b2a685736eb0365528842992a770a3bf544ece4ccc0ed9a7bf703", size = 2721899, upload-time = "2025-05-31T23:20:36.269Z" }, + { url = "https://files.pythonhosted.org/packages/67/96/f90db955ed8b8d422d09b15e3b1f759a02e4700021f2e4ac68dd5cedca51/arro3_core-0.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cbc512e90647176528ea09ac18a5d27a47a0ac05755b7924ffcb89923dbf6e38", size = 2431834, upload-time = "2025-05-31T23:20:40.269Z" }, + { url = "https://files.pythonhosted.org/packages/88/f3/c58d9769d46b13f6d51ff5998885396ef224eb384a0ebda236ef26a833a7/arro3_core-0.5.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:d4d0141a6b7f5744750cc4066f564cfd509df6857704a2a9a29946a7c2f08f2b", size = 2866047, upload-time = "2025-05-31T23:20:43.72Z" }, + { url = "https://files.pythonhosted.org/packages/7c/7a/af901793fa426e8b86194654820c3612001b165b25f3bd7adde8d9e7bef4/arro3_core-0.5.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8c14b496f93906125baccef75703f0ea1c91608c201296bc21a1e916e5eb42c", size = 2792693, upload-time = "2025-05-31T23:20:47.071Z" }, + { url = "https://files.pythonhosted.org/packages/2e/97/651eb8358d64d2bf5353db3d31ae6cb06529a07d2be699aa6a27434c6811/arro3_core-0.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:40e9db9564f22286310c5304884468b98d4eeb628f71c22f27d527e4219ae247", size = 2706150, upload-time = "2025-05-31T23:20:51.012Z" }, + { url = "https://files.pythonhosted.org/packages/f3/af/0d591453490941e7cd2524ccac0398824eabafa745d0a25a758b1de2e361/arro3_core-0.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:bb0b13975c5394cb6a9887495aaf06cad8993893f99911c8aa2b827cd55dd6a8", size = 2612300, upload-time = "2025-05-31T23:20:54.249Z" }, + { url = "https://files.pythonhosted.org/packages/74/5c/c7135425c172d7fbc94c47ab48d46431d52df5b5f888bc140f7b2b710037/arro3_core-0.5.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f778d41f63cadb1b9e6bce3446e2758db271bc9b81878617232729053c7520fc", size = 2447436, upload-time = "2025-05-31T23:21:45.231Z" }, + { url = "https://files.pythonhosted.org/packages/5e/2c/b7f94e70101abaafa78a36445fdeadfc4461535a0acf55cd9c20bdc7e2b3/arro3_core-0.5.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:4df0b8594518bec2602d1b289dbabf22b9b0b63affc90ff0d6107990208c5e67", size = 2154852, upload-time = "2025-05-31T23:21:48.708Z" }, + { url = "https://files.pythonhosted.org/packages/7d/05/020b1cc1449755d35ba91d814d047fa20d18b9fb577a9fe9b87c72a1a217/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1165f2973c7472e564cef53505cc55852991733f00991b42d011d0f76c4c4c4a", size = 2593644, upload-time = "2025-05-31T23:21:52.812Z" }, + { url = "https://files.pythonhosted.org/packages/f8/92/5160d6adaad3a1db443ff5409353ec4df82d2068a8ed9b8e738036325c3c/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:abafcb7f2fe892700e5821b5158c98fad772a2c7412c9b35e4174ed919e24ed4", size = 2635380, upload-time = "2025-05-31T23:21:56.684Z" }, + { url = "https://files.pythonhosted.org/packages/53/21/4aa439cc2b597e0de66aef03f0f509afe206547b0794ce0ba004134fe716/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:93120f0de07f2cac507219e74ef25a95a10fc5ec5a2d51c2fd117db2929220df", size = 2867549, upload-time = "2025-05-31T23:22:00.93Z" }, + { url = "https://files.pythonhosted.org/packages/5c/01/1338fff3c27366cd9ffc444c96aa74bfea3dc8ebb9dea4ee33346d74bccd/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:657896fc1e50e39d6ad9261f15cca103f26a7213dc30a6300dbcec6c5cc5a72d", size = 2539421, upload-time = "2025-05-31T23:22:04.631Z" }, + { url = "https://files.pythonhosted.org/packages/bc/78/3660ee1f71074a5195ae96c0cc9b58464c588705a5a93cc26b4f23a51cac/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_24_aarch64.whl", hash = "sha256:a8a6df4af193898b6e09902ba76a9c0c8699efaf91b3cff87d5f49cc97e04544", size = 2289147, upload-time = "2025-05-31T23:22:08.53Z" }, + { url = "https://files.pythonhosted.org/packages/85/cb/37d165bdb1633249e2e987d52d00308f790b4d24121b2a0a2a7817e1f8bb/arro3_core-0.5.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0d3faf92e45b479cd5556370db1c8895f153d9f59c52fdbd85af751838c8b218", size = 2723645, upload-time = "2025-05-31T23:22:12.604Z" }, + { url = "https://files.pythonhosted.org/packages/40/18/3edf9949cc09f9545e06abe8fd2b92eff71e86f8927062a3ab8cb1320377/arro3_core-0.5.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:07e358e8ea9c7b8fa38af79d0942b1e3174123541584370e9020394101d4198a", size = 2434306, upload-time = "2025-05-31T23:22:16.431Z" }, + { url = "https://files.pythonhosted.org/packages/87/2e/98a874f5f3b3baf911d8b87151b6654ac161ccb09ebb2cf621ac4da2edc3/arro3_core-0.5.1-pp310-pypy310_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:70cfb884cfb465f4c0143a38e172a6de4a904afe884bd6773a89c4c6659c41e7", size = 2868790, upload-time = "2025-05-31T23:22:20.536Z" }, + { url = "https://files.pythonhosted.org/packages/1a/4c/0f7aa37d3374a82fa084517ac353378fc397685422ee1eac8884044cd487/arro3_core-0.5.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:9f47326af6c10cec993cee9cbcc4e554dc0c06269e2ba6f83c68235ae13ee98c", size = 2796671, upload-time = "2025-05-31T23:22:24.62Z" }, + { url = "https://files.pythonhosted.org/packages/0d/90/1c0714e2c1af68229e8d49c53a861399654b26152a19306927e48740dbd1/arro3_core-0.5.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:5832859f53eb82c67bda2a655d466fb8520d096166df4ee9b0b17df748cbacb1", size = 2708649, upload-time = "2025-05-31T23:22:28.719Z" }, +] + [[package]] name = "asttokens" version = "3.0.0" @@ -33,6 +117,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fe/ba/e2081de779ca30d473f21f5b30e0e737c438205440784c7dfc81efc2b029/async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c", size = 6233, upload-time = "2024-11-06T16:41:37.9Z" }, ] +[[package]] +name = "cachetools" +version = "5.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/81/3747dad6b14fa2cf53fcf10548cf5aea6913e96fab41a3c198676f8948a5/cachetools-5.5.2.tar.gz", hash = "sha256:1a661caa9175d26759571b2e19580f9d6393969e5dfca11fdb1f947a23e640d4", size = 28380, upload-time = "2025-02-20T21:01:19.524Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" }, +] + [[package]] name = "certifi" version = "2025.4.26" @@ -160,6 +253,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" }, ] +[[package]] +name = "click" +version = "8.2.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload-time = "2025-05-20T23:19:47.796Z" }, +] + [[package]] name = "colorama" version = "0.4.6" @@ -369,6 +474,36 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, ] +[[package]] +name = "deltalake" +version = "1.0.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "arro3-core" }, + { name = "deprecated" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1e/c3/19cd8457243c41aa60562d28b66271ff958d896e3fd9373816d8fd781f1a/deltalake-1.0.2.tar.gz", hash = "sha256:fbe4cccde0af14c6e30b62cc3dd09e9a46777e8fd8e375ec809a6bf4edea756c", size = 5076074, upload-time = "2025-06-02T11:08:14.063Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/74/043f52f50cbda7f651d39465fb7c5a9e8880e9a332abbb4f64c4d0522306/deltalake-1.0.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:e4f24cdbadaf8a4c32ae535a44b89d8bcafd5cb97897de33a4ec8609058a7d50", size = 41649942, upload-time = "2025-06-02T11:08:17.754Z" }, + { url = "https://files.pythonhosted.org/packages/f9/99/ced0f538deacdf0f1e78e28a14c30420d8df1c7d9ca30ff8f71a03a008a7/deltalake-1.0.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:43731c48657c16c1728c90270e5e7ae1f3fa1a5b6fb0cb0b55c88c5c8f23cc3f", size = 38590012, upload-time = "2025-06-02T11:09:07.48Z" }, + { url = "https://files.pythonhosted.org/packages/6f/f1/feee0df833eed13a27aafeedfac313c0b6bf7b0d712fa5892b1099a7a752/deltalake-1.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c921b47e4810a346650141dae30abc69564e57f26e00cce256f1837dd9c4b5fd", size = 40281750, upload-time = "2025-06-02T11:08:52.532Z" }, + { url = "https://files.pythonhosted.org/packages/dc/6f/4707d7511bd172f6c6504e87ea0bc43cdf7b5a4c85340ff61cee83170e37/deltalake-1.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59a3b403e5871d12920798d27f2b1e4b70f4e975381841066cb6733ccbc80071", size = 51273870, upload-time = "2025-06-02T11:08:10.194Z" }, + { url = "https://files.pythonhosted.org/packages/5c/2a/1dfc1f337f85d62141b4e70923b923d5faccbe666d4253b670c6d506d1bb/deltalake-1.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:67d3224ce7e569bbb6d5181f9ed2530b237a1cdc87f413e5ff0bc1227aab50d5", size = 40293966, upload-time = "2025-06-02T11:08:51.989Z" }, + { url = "https://files.pythonhosted.org/packages/78/a9/9014b804f947a505c21a6c0cbc87e2673cacb6cd82ac70be9a60f26a836b/deltalake-1.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:7a1606f535416d4a38ce554019f9fcad194aaec33d638328662b2de46af03059", size = 42567914, upload-time = "2025-06-02T11:23:49.313Z" }, +] + +[[package]] +name = "deprecated" +version = "1.2.18" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wrapt" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744, upload-time = "2025-01-27T10:46:25.7Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998, upload-time = "2025-01-27T10:46:09.186Z" }, +] + [[package]] name = "exceptiongroup" version = "1.3.0" @@ -431,6 +566,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/21/ff/995277586691c0cc314c28b24b4ec30610440fd7bf580072aed1409f95b0/fonttools-4.58.1-py3-none-any.whl", hash = "sha256:db88365d0962cd6f5bce54b190a4669aeed9c9941aa7bd60a5af084d8d9173d6", size = 1113429, upload-time = "2025-05-28T15:29:24.185Z" }, ] +[[package]] +name = "fsspec" +version = "2025.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033, upload-time = "2025-05-24T12:03:23.792Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/61/78c7b3851add1481b048b5fdc29067397a1784e2910592bc81bb3f608635/fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462", size = 199052, upload-time = "2025-05-24T12:03:21.66Z" }, +] + [[package]] name = "httpie" version = "3.2.4" @@ -525,7 +669,8 @@ name = "ipython" version = "9.2.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.11'", + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", ] dependencies = [ { name = "colorama", marker = "python_full_version >= '3.11' and sys_platform == 'win32'" }, @@ -771,6 +916,78 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "mmh3" +version = "5.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/1b/1fc6888c74cbd8abad1292dde2ddfcf8fc059e114c97dd6bf16d12f36293/mmh3-5.1.0.tar.gz", hash = "sha256:136e1e670500f177f49ec106a4ebf0adf20d18d96990cc36ea492c651d2b406c", size = 33728, upload-time = "2025-01-25T08:39:43.386Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/01/9d06468928661765c0fc248a29580c760a4a53a9c6c52cf72528bae3582e/mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec", size = 56095, upload-time = "2025-01-25T08:37:53.621Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/7b39307fc9db867b2a9a20c58b0de33b778dd6c55e116af8ea031f1433ba/mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a", size = 40512, upload-time = "2025-01-25T08:37:54.972Z" }, + { url = "https://files.pythonhosted.org/packages/4f/85/728ca68280d8ccc60c113ad119df70ff1748fbd44c89911fed0501faf0b8/mmh3-5.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4ba8cac21e1f2d4e436ce03a82a7f87cda80378691f760e9ea55045ec480a3d", size = 40110, upload-time = "2025-01-25T08:37:57.86Z" }, + { url = "https://files.pythonhosted.org/packages/e4/96/beaf0e301472ffa00358bbbf771fe2d9c4d709a2fe30b1d929e569f8cbdf/mmh3-5.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d69281c281cb01994f054d862a6bb02a2e7acfe64917795c58934b0872b9ece4", size = 100151, upload-time = "2025-01-25T08:37:59.609Z" }, + { url = "https://files.pythonhosted.org/packages/c3/ee/9381f825c4e09ffafeffa213c3865c4bf7d39771640de33ab16f6faeb854/mmh3-5.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d05ed3962312fbda2a1589b97359d2467f677166952f6bd410d8c916a55febf", size = 106312, upload-time = "2025-01-25T08:38:02.102Z" }, + { url = "https://files.pythonhosted.org/packages/67/dc/350a54bea5cf397d357534198ab8119cfd0d8e8bad623b520f9c290af985/mmh3-5.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78ae6a03f4cff4aa92ddd690611168856f8c33a141bd3e5a1e0a85521dc21ea0", size = 104232, upload-time = "2025-01-25T08:38:03.852Z" }, + { url = "https://files.pythonhosted.org/packages/b2/5d/2c6eb4a4ec2f7293b98a9c07cb8c64668330b46ff2b6511244339e69a7af/mmh3-5.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95f983535b39795d9fb7336438faae117424c6798f763d67c6624f6caf2c4c01", size = 91663, upload-time = "2025-01-25T08:38:06.24Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ac/17030d24196f73ecbab8b5033591e5e0e2beca103181a843a135c78f4fee/mmh3-5.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d46fdd80d4c7ecadd9faa6181e92ccc6fe91c50991c9af0e371fdf8b8a7a6150", size = 99166, upload-time = "2025-01-25T08:38:07.988Z" }, + { url = "https://files.pythonhosted.org/packages/b9/ed/54ddc56603561a10b33da9b12e95a48a271d126f4a4951841bbd13145ebf/mmh3-5.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0f16e976af7365ea3b5c425124b2a7f0147eed97fdbb36d99857f173c8d8e096", size = 101555, upload-time = "2025-01-25T08:38:09.821Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c3/33fb3a940c9b70908a5cc9fcc26534aff8698180f9f63ab6b7cc74da8bcd/mmh3-5.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:6fa97f7d1e1f74ad1565127229d510f3fd65d931fdedd707c1e15100bc9e5ebb", size = 94813, upload-time = "2025-01-25T08:38:11.682Z" }, + { url = "https://files.pythonhosted.org/packages/61/88/c9ff76a23abe34db8eee1a6fa4e449462a16c7eb547546fc5594b0860a72/mmh3-5.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4052fa4a8561bd62648e9eb993c8f3af3bdedadf3d9687aa4770d10e3709a80c", size = 109611, upload-time = "2025-01-25T08:38:12.602Z" }, + { url = "https://files.pythonhosted.org/packages/0b/8e/27d04f40e95554ebe782cac7bddda2d158cf3862387298c9c7b254fa7beb/mmh3-5.1.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:3f0e8ae9f961037f812afe3cce7da57abf734285961fffbeff9a4c011b737732", size = 100515, upload-time = "2025-01-25T08:38:16.407Z" }, + { url = "https://files.pythonhosted.org/packages/7b/00/504ca8f462f01048f3c87cd93f2e1f60b93dac2f930cd4ed73532a9337f5/mmh3-5.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:99297f207db967814f1f02135bb7fe7628b9eacb046134a34e1015b26b06edce", size = 100177, upload-time = "2025-01-25T08:38:18.186Z" }, + { url = "https://files.pythonhosted.org/packages/6f/1d/2efc3525fe6fdf8865972fcbb884bd1f4b0f923c19b80891cecf7e239fa5/mmh3-5.1.0-cp310-cp310-win32.whl", hash = "sha256:2e6c8dc3631a5e22007fbdb55e993b2dbce7985c14b25b572dd78403c2e79182", size = 40815, upload-time = "2025-01-25T08:38:19.176Z" }, + { url = "https://files.pythonhosted.org/packages/38/b5/c8fbe707cb0fea77a6d2d58d497bc9b67aff80deb84d20feb34d8fdd8671/mmh3-5.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:e4e8c7ad5a4dddcfde35fd28ef96744c1ee0f9d9570108aa5f7e77cf9cfdf0bf", size = 41479, upload-time = "2025-01-25T08:38:21.098Z" }, + { url = "https://files.pythonhosted.org/packages/a1/f1/663e16134f913fccfbcea5b300fb7dc1860d8f63dc71867b013eebc10aec/mmh3-5.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:45da549269883208912868a07d0364e1418d8292c4259ca11699ba1b2475bd26", size = 38883, upload-time = "2025-01-25T08:38:22.013Z" }, + { url = "https://files.pythonhosted.org/packages/56/09/fda7af7fe65928262098382e3bf55950cfbf67d30bf9e47731bf862161e9/mmh3-5.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0b529dcda3f951ff363a51d5866bc6d63cf57f1e73e8961f864ae5010647079d", size = 56098, upload-time = "2025-01-25T08:38:22.917Z" }, + { url = "https://files.pythonhosted.org/packages/0c/ab/84c7bc3f366d6f3bd8b5d9325a10c367685bc17c26dac4c068e2001a4671/mmh3-5.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db1079b3ace965e562cdfc95847312f9273eb2ad3ebea983435c8423e06acd7", size = 40513, upload-time = "2025-01-25T08:38:25.079Z" }, + { url = "https://files.pythonhosted.org/packages/4f/21/25ea58ca4a652bdc83d1528bec31745cce35802381fb4fe3c097905462d2/mmh3-5.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:22d31e3a0ff89b8eb3b826d6fc8e19532998b2aa6b9143698043a1268da413e1", size = 40112, upload-time = "2025-01-25T08:38:25.947Z" }, + { url = "https://files.pythonhosted.org/packages/bd/78/4f12f16ae074ddda6f06745254fdb50f8cf3c85b0bbf7eaca58bed84bf58/mmh3-5.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2139bfbd354cd6cb0afed51c4b504f29bcd687a3b1460b7e89498329cc28a894", size = 102632, upload-time = "2025-01-25T08:38:26.939Z" }, + { url = "https://files.pythonhosted.org/packages/48/11/8f09dc999cf2a09b6138d8d7fc734efb7b7bfdd9adb9383380941caadff0/mmh3-5.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c8105c6a435bc2cd6ea2ef59558ab1a2976fd4a4437026f562856d08996673a", size = 108884, upload-time = "2025-01-25T08:38:29.159Z" }, + { url = "https://files.pythonhosted.org/packages/bd/91/e59a66538a3364176f6c3f7620eee0ab195bfe26f89a95cbcc7a1fb04b28/mmh3-5.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57730067174a7f36fcd6ce012fe359bd5510fdaa5fe067bc94ed03e65dafb769", size = 106835, upload-time = "2025-01-25T08:38:33.04Z" }, + { url = "https://files.pythonhosted.org/packages/25/14/b85836e21ab90e5cddb85fe79c494ebd8f81d96a87a664c488cc9277668b/mmh3-5.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bde80eb196d7fdc765a318604ded74a4378f02c5b46c17aa48a27d742edaded2", size = 93688, upload-time = "2025-01-25T08:38:34.987Z" }, + { url = "https://files.pythonhosted.org/packages/ac/aa/8bc964067df9262740c95e4cde2d19f149f2224f426654e14199a9e47df6/mmh3-5.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9c8eddcb441abddeb419c16c56fd74b3e2df9e57f7aa2903221996718435c7a", size = 101569, upload-time = "2025-01-25T08:38:35.983Z" }, + { url = "https://files.pythonhosted.org/packages/70/b6/1fb163cbf919046a64717466c00edabebece3f95c013853fec76dbf2df92/mmh3-5.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:99e07e4acafbccc7a28c076a847fb060ffc1406036bc2005acb1b2af620e53c3", size = 98483, upload-time = "2025-01-25T08:38:38.198Z" }, + { url = "https://files.pythonhosted.org/packages/70/49/ba64c050dd646060f835f1db6b2cd60a6485f3b0ea04976e7a29ace7312e/mmh3-5.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9e25ba5b530e9a7d65f41a08d48f4b3fedc1e89c26486361166a5544aa4cad33", size = 96496, upload-time = "2025-01-25T08:38:39.257Z" }, + { url = "https://files.pythonhosted.org/packages/9e/07/f2751d6a0b535bb865e1066e9c6b80852571ef8d61bce7eb44c18720fbfc/mmh3-5.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:bb9bf7475b4d99156ce2f0cf277c061a17560c8c10199c910a680869a278ddc7", size = 105109, upload-time = "2025-01-25T08:38:40.395Z" }, + { url = "https://files.pythonhosted.org/packages/b7/02/30360a5a66f7abba44596d747cc1e6fb53136b168eaa335f63454ab7bb79/mmh3-5.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2a1b0878dd281ea3003368ab53ff6f568e175f1b39f281df1da319e58a19c23a", size = 98231, upload-time = "2025-01-25T08:38:42.141Z" }, + { url = "https://files.pythonhosted.org/packages/8c/60/8526b0c750ff4d7ae1266e68b795f14b97758a1d9fcc19f6ecabf9c55656/mmh3-5.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:25f565093ac8b8aefe0f61f8f95c9a9d11dd69e6a9e9832ff0d293511bc36258", size = 97548, upload-time = "2025-01-25T08:38:43.402Z" }, + { url = "https://files.pythonhosted.org/packages/6d/4c/26e1222aca65769280d5427a1ce5875ef4213449718c8f03958d0bf91070/mmh3-5.1.0-cp311-cp311-win32.whl", hash = "sha256:1e3554d8792387eac73c99c6eaea0b3f884e7130eb67986e11c403e4f9b6d372", size = 40810, upload-time = "2025-01-25T08:38:45.143Z" }, + { url = "https://files.pythonhosted.org/packages/98/d5/424ba95062d1212ea615dc8debc8d57983f2242d5e6b82e458b89a117a1e/mmh3-5.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:8ad777a48197882492af50bf3098085424993ce850bdda406a358b6ab74be759", size = 41476, upload-time = "2025-01-25T08:38:46.029Z" }, + { url = "https://files.pythonhosted.org/packages/bd/08/0315ccaf087ba55bb19a6dd3b1e8acd491e74ce7f5f9c4aaa06a90d66441/mmh3-5.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f29dc4efd99bdd29fe85ed6c81915b17b2ef2cf853abf7213a48ac6fb3eaabe1", size = 38880, upload-time = "2025-01-25T08:38:47.035Z" }, + { url = "https://files.pythonhosted.org/packages/f4/47/e5f452bdf16028bfd2edb4e2e35d0441e4a4740f30e68ccd4cfd2fb2c57e/mmh3-5.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:45712987367cb9235026e3cbf4334670522a97751abfd00b5bc8bfa022c3311d", size = 56152, upload-time = "2025-01-25T08:38:47.902Z" }, + { url = "https://files.pythonhosted.org/packages/60/38/2132d537dc7a7fdd8d2e98df90186c7fcdbd3f14f95502a24ba443c92245/mmh3-5.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b1020735eb35086ab24affbea59bb9082f7f6a0ad517cb89f0fc14f16cea4dae", size = 40564, upload-time = "2025-01-25T08:38:48.839Z" }, + { url = "https://files.pythonhosted.org/packages/c0/2a/c52cf000581bfb8d94794f58865658e7accf2fa2e90789269d4ae9560b16/mmh3-5.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:babf2a78ce5513d120c358722a2e3aa7762d6071cd10cede026f8b32452be322", size = 40104, upload-time = "2025-01-25T08:38:49.773Z" }, + { url = "https://files.pythonhosted.org/packages/83/33/30d163ce538c54fc98258db5621447e3ab208d133cece5d2577cf913e708/mmh3-5.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4f47f58cd5cbef968c84a7c1ddc192fef0a36b48b0b8a3cb67354531aa33b00", size = 102634, upload-time = "2025-01-25T08:38:51.5Z" }, + { url = "https://files.pythonhosted.org/packages/94/5c/5a18acb6ecc6852be2d215c3d811aa61d7e425ab6596be940877355d7f3e/mmh3-5.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2044a601c113c981f2c1e14fa33adc9b826c9017034fe193e9eb49a6882dbb06", size = 108888, upload-time = "2025-01-25T08:38:52.542Z" }, + { url = "https://files.pythonhosted.org/packages/1f/f6/11c556324c64a92aa12f28e221a727b6e082e426dc502e81f77056f6fc98/mmh3-5.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c94d999c9f2eb2da44d7c2826d3fbffdbbbbcde8488d353fee7c848ecc42b968", size = 106968, upload-time = "2025-01-25T08:38:54.286Z" }, + { url = "https://files.pythonhosted.org/packages/5d/61/ca0c196a685aba7808a5c00246f17b988a9c4f55c594ee0a02c273e404f3/mmh3-5.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a015dcb24fa0c7a78f88e9419ac74f5001c1ed6a92e70fd1803f74afb26a4c83", size = 93771, upload-time = "2025-01-25T08:38:55.576Z" }, + { url = "https://files.pythonhosted.org/packages/b4/55/0927c33528710085ee77b808d85bbbafdb91a1db7c8eaa89cac16d6c513e/mmh3-5.1.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:457da019c491a2d20e2022c7d4ce723675e4c081d9efc3b4d8b9f28a5ea789bd", size = 101726, upload-time = "2025-01-25T08:38:56.654Z" }, + { url = "https://files.pythonhosted.org/packages/49/39/a92c60329fa470f41c18614a93c6cd88821412a12ee78c71c3f77e1cfc2d/mmh3-5.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:71408579a570193a4ac9c77344d68ddefa440b00468a0b566dcc2ba282a9c559", size = 98523, upload-time = "2025-01-25T08:38:57.662Z" }, + { url = "https://files.pythonhosted.org/packages/81/90/26adb15345af8d9cf433ae1b6adcf12e0a4cad1e692de4fa9f8e8536c5ae/mmh3-5.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8b3a04bc214a6e16c81f02f855e285c6df274a2084787eeafaa45f2fbdef1b63", size = 96628, upload-time = "2025-01-25T08:38:59.505Z" }, + { url = "https://files.pythonhosted.org/packages/8a/4d/340d1e340df972a13fd4ec84c787367f425371720a1044220869c82364e9/mmh3-5.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:832dae26a35514f6d3c1e267fa48e8de3c7b978afdafa0529c808ad72e13ada3", size = 105190, upload-time = "2025-01-25T08:39:00.483Z" }, + { url = "https://files.pythonhosted.org/packages/d3/7c/65047d1cccd3782d809936db446430fc7758bda9def5b0979887e08302a2/mmh3-5.1.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bf658a61fc92ef8a48945ebb1076ef4ad74269e353fffcb642dfa0890b13673b", size = 98439, upload-time = "2025-01-25T08:39:01.484Z" }, + { url = "https://files.pythonhosted.org/packages/72/d2/3c259d43097c30f062050f7e861075099404e8886b5d4dd3cebf180d6e02/mmh3-5.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3313577453582b03383731b66447cdcdd28a68f78df28f10d275d7d19010c1df", size = 97780, upload-time = "2025-01-25T08:39:02.444Z" }, + { url = "https://files.pythonhosted.org/packages/29/29/831ea8d4abe96cdb3e28b79eab49cac7f04f9c6b6e36bfc686197ddba09d/mmh3-5.1.0-cp312-cp312-win32.whl", hash = "sha256:1d6508504c531ab86c4424b5a5ff07c1132d063863339cf92f6657ff7a580f76", size = 40835, upload-time = "2025-01-25T08:39:03.369Z" }, + { url = "https://files.pythonhosted.org/packages/12/dd/7cbc30153b73f08eeac43804c1dbc770538a01979b4094edbe1a4b8eb551/mmh3-5.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:aa75981fcdf3f21759d94f2c81b6a6e04a49dfbcdad88b152ba49b8e20544776", size = 41509, upload-time = "2025-01-25T08:39:04.284Z" }, + { url = "https://files.pythonhosted.org/packages/80/9d/627375bab4c90dd066093fc2c9a26b86f87e26d980dbf71667b44cbee3eb/mmh3-5.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:a4c1a76808dfea47f7407a0b07aaff9087447ef6280716fd0783409b3088bb3c", size = 38888, upload-time = "2025-01-25T08:39:05.174Z" }, + { url = "https://files.pythonhosted.org/packages/05/06/a098a42870db16c0a54a82c56a5bdc873de3165218cd5b3ca59dbc0d31a7/mmh3-5.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7a523899ca29cfb8a5239618474a435f3d892b22004b91779fcb83504c0d5b8c", size = 56165, upload-time = "2025-01-25T08:39:06.887Z" }, + { url = "https://files.pythonhosted.org/packages/5a/65/eaada79a67fde1f43e1156d9630e2fb70655e1d3f4e8f33d7ffa31eeacfd/mmh3-5.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:17cef2c3a6ca2391ca7171a35ed574b5dab8398163129a3e3a4c05ab85a4ff40", size = 40569, upload-time = "2025-01-25T08:39:07.945Z" }, + { url = "https://files.pythonhosted.org/packages/36/7e/2b6c43ed48be583acd68e34d16f19209a9f210e4669421b0321e326d8554/mmh3-5.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:52e12895b30110f3d89dae59a888683cc886ed0472dd2eca77497edef6161997", size = 40104, upload-time = "2025-01-25T08:39:09.598Z" }, + { url = "https://files.pythonhosted.org/packages/11/2b/1f9e962fdde8e41b0f43d22c8ba719588de8952f9376df7d73a434827590/mmh3-5.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d6719045cda75c3f40397fc24ab67b18e0cb8f69d3429ab4c39763c4c608dd", size = 102497, upload-time = "2025-01-25T08:39:10.512Z" }, + { url = "https://files.pythonhosted.org/packages/46/94/d6c5c3465387ba077cccdc028ab3eec0d86eed1eebe60dcf4d15294056be/mmh3-5.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d19fa07d303a91f8858982c37e6939834cb11893cb3ff20e6ee6fa2a7563826a", size = 108834, upload-time = "2025-01-25T08:39:11.568Z" }, + { url = "https://files.pythonhosted.org/packages/34/1e/92c212bb81796b69dddfd50a8a8f4b26ab0d38fdaf1d3e8628a67850543b/mmh3-5.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:31b47a620d622fbde8ca1ca0435c5d25de0ac57ab507209245e918128e38e676", size = 106936, upload-time = "2025-01-25T08:39:12.638Z" }, + { url = "https://files.pythonhosted.org/packages/f4/41/f2f494bbff3aad5ffd2085506255049de76cde51ddac84058e32768acc79/mmh3-5.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00f810647c22c179b6821079f7aa306d51953ac893587ee09cf1afb35adf87cb", size = 93709, upload-time = "2025-01-25T08:39:14.071Z" }, + { url = "https://files.pythonhosted.org/packages/9e/a9/a2cc4a756d73d9edf4fb85c76e16fd56b0300f8120fd760c76b28f457730/mmh3-5.1.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6128b610b577eed1e89ac7177ab0c33d06ade2aba93f5c89306032306b5f1c6", size = 101623, upload-time = "2025-01-25T08:39:15.507Z" }, + { url = "https://files.pythonhosted.org/packages/5e/6f/b9d735533b6a56b2d56333ff89be6a55ac08ba7ff33465feb131992e33eb/mmh3-5.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1e550a45d2ff87a1c11b42015107f1778c93f4c6f8e731bf1b8fa770321b8cc4", size = 98521, upload-time = "2025-01-25T08:39:16.77Z" }, + { url = "https://files.pythonhosted.org/packages/99/47/dff2b54fac0d421c1e6ecbd2d9c85b2d0e6f6ee0d10b115d9364116a511e/mmh3-5.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:785ae09276342f79fd8092633e2d52c0f7c44d56e8cfda8274ccc9b76612dba2", size = 96696, upload-time = "2025-01-25T08:39:17.805Z" }, + { url = "https://files.pythonhosted.org/packages/be/43/9e205310f47c43ddf1575bb3a1769c36688f30f1ac105e0f0c878a29d2cd/mmh3-5.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0f4be3703a867ef976434afd3661a33884abe73ceb4ee436cac49d3b4c2aaa7b", size = 105234, upload-time = "2025-01-25T08:39:18.908Z" }, + { url = "https://files.pythonhosted.org/packages/6b/44/90b11fd2b67dcb513f5bfe9b476eb6ca2d5a221c79b49884dc859100905e/mmh3-5.1.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e513983830c4ff1f205ab97152a0050cf7164f1b4783d702256d39c637b9d107", size = 98449, upload-time = "2025-01-25T08:39:20.719Z" }, + { url = "https://files.pythonhosted.org/packages/f0/d0/25c4b0c7b8e49836541059b28e034a4cccd0936202800d43a1cc48495ecb/mmh3-5.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9135c300535c828c0bae311b659f33a31c941572eae278568d1a953c4a57b59", size = 97796, upload-time = "2025-01-25T08:39:22.453Z" }, + { url = "https://files.pythonhosted.org/packages/23/fa/cbbb7fcd0e287a715f1cd28a10de94c0535bd94164e38b852abc18da28c6/mmh3-5.1.0-cp313-cp313-win32.whl", hash = "sha256:c65dbd12885a5598b70140d24de5839551af5a99b29f9804bb2484b29ef07692", size = 40828, upload-time = "2025-01-25T08:39:23.372Z" }, + { url = "https://files.pythonhosted.org/packages/09/33/9fb90ef822f7b734955a63851907cf72f8a3f9d8eb3c5706bfa6772a2a77/mmh3-5.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:10db7765201fc65003fa998faa067417ef6283eb5f9bba8f323c48fd9c33e91f", size = 41504, upload-time = "2025-01-25T08:39:24.286Z" }, + { url = "https://files.pythonhosted.org/packages/16/71/4ad9a42f2772793a03cb698f0fc42499f04e6e8d2560ba2f7da0fb059a8e/mmh3-5.1.0-cp313-cp313-win_arm64.whl", hash = "sha256:b22fe2e54be81f6c07dcb36b96fa250fb72effe08aa52fbb83eade6e1e2d5fd7", size = 38890, upload-time = "2025-01-25T08:39:25.28Z" }, +] + [[package]] name = "multidict" version = "6.4.4" @@ -894,7 +1111,8 @@ name = "networkx" version = "3.5" source = { registry = "https://pypi.org/simple" } resolution-markers = [ - "python_full_version >= '3.11'", + "python_full_version >= '3.12'", + "python_full_version == '3.11.*'", ] sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065, upload-time = "2025-05-29T11:35:07.804Z" } wheels = [ @@ -970,6 +1188,10 @@ dependencies = [ { name = "matplotlib" }, { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pandas" }, + { name = "polars" }, + { name = "pyarrow" }, + { name = "pyyaml" }, { name = "typing-extensions" }, { name = "xxhash" }, ] @@ -981,18 +1203,25 @@ redis = [ [package.dev-dependencies] dev = [ + { name = "deltalake" }, { name = "httpie" }, { name = "ipykernel" }, + { name = "pyiceberg" }, { name = "pytest" }, { name = "pytest-cov" }, { name = "redis" }, { name = "ruff" }, + { name = "tqdm" }, ] [package.metadata] requires-dist = [ { name = "matplotlib", specifier = ">=3.10.3" }, { name = "networkx" }, + { name = "pandas", specifier = ">=2.2.3" }, + { name = "polars", specifier = ">=1.30.0" }, + { name = "pyarrow", specifier = ">=20.0.0" }, + { name = "pyyaml", specifier = ">=6.0.2" }, { name = "redis", marker = "extra == 'redis'", specifier = ">=6.2.0" }, { name = "typing-extensions" }, { name = "xxhash" }, @@ -1001,12 +1230,15 @@ provides-extras = ["redis"] [package.metadata.requires-dev] dev = [ + { name = "deltalake", specifier = ">=1.0.2" }, { name = "httpie", specifier = ">=3.2.4" }, { name = "ipykernel", specifier = ">=6.29.5" }, + { name = "pyiceberg", specifier = ">=0.9.1" }, { name = "pytest", specifier = ">=8.3.5" }, { name = "pytest-cov", specifier = ">=6.1.1" }, { name = "redis", specifier = ">=6.2.0" }, { name = "ruff", specifier = ">=0.11.11" }, + { name = "tqdm", specifier = ">=4.67.1" }, ] [[package]] @@ -1018,6 +1250,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, ] +[[package]] +name = "pandas" +version = "2.2.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "python-dateutil" }, + { name = "pytz" }, + { name = "tzdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload-time = "2024-09-20T13:10:04.827Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/aa/70/c853aec59839bceed032d52010ff5f1b8d87dc3114b762e4ba2727661a3b/pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5", size = 12580827, upload-time = "2024-09-20T13:08:42.347Z" }, + { url = "https://files.pythonhosted.org/packages/99/f2/c4527768739ffa4469b2b4fff05aa3768a478aed89a2f271a79a40eee984/pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348", size = 11303897, upload-time = "2024-09-20T13:08:45.807Z" }, + { url = "https://files.pythonhosted.org/packages/ed/12/86c1747ea27989d7a4064f806ce2bae2c6d575b950be087837bdfcabacc9/pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed", size = 66480908, upload-time = "2024-09-20T18:37:13.513Z" }, + { url = "https://files.pythonhosted.org/packages/44/50/7db2cd5e6373ae796f0ddad3675268c8d59fb6076e66f0c339d61cea886b/pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57", size = 13064210, upload-time = "2024-09-20T13:08:48.325Z" }, + { url = "https://files.pythonhosted.org/packages/61/61/a89015a6d5536cb0d6c3ba02cebed51a95538cf83472975275e28ebf7d0c/pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42", size = 16754292, upload-time = "2024-09-20T19:01:54.443Z" }, + { url = "https://files.pythonhosted.org/packages/ce/0d/4cc7b69ce37fac07645a94e1d4b0880b15999494372c1523508511b09e40/pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f", size = 14416379, upload-time = "2024-09-20T13:08:50.882Z" }, + { url = "https://files.pythonhosted.org/packages/31/9e/6ebb433de864a6cd45716af52a4d7a8c3c9aaf3a98368e61db9e69e69a9c/pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645", size = 11598471, upload-time = "2024-09-20T13:08:53.332Z" }, + { url = "https://files.pythonhosted.org/packages/a8/44/d9502bf0ed197ba9bf1103c9867d5904ddcaf869e52329787fc54ed70cc8/pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039", size = 12602222, upload-time = "2024-09-20T13:08:56.254Z" }, + { url = "https://files.pythonhosted.org/packages/52/11/9eac327a38834f162b8250aab32a6781339c69afe7574368fffe46387edf/pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd", size = 11321274, upload-time = "2024-09-20T13:08:58.645Z" }, + { url = "https://files.pythonhosted.org/packages/45/fb/c4beeb084718598ba19aa9f5abbc8aed8b42f90930da861fcb1acdb54c3a/pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698", size = 15579836, upload-time = "2024-09-20T19:01:57.571Z" }, + { url = "https://files.pythonhosted.org/packages/cd/5f/4dba1d39bb9c38d574a9a22548c540177f78ea47b32f99c0ff2ec499fac5/pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc", size = 13058505, upload-time = "2024-09-20T13:09:01.501Z" }, + { url = "https://files.pythonhosted.org/packages/b9/57/708135b90391995361636634df1f1130d03ba456e95bcf576fada459115a/pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3", size = 16744420, upload-time = "2024-09-20T19:02:00.678Z" }, + { url = "https://files.pythonhosted.org/packages/86/4a/03ed6b7ee323cf30404265c284cee9c65c56a212e0a08d9ee06984ba2240/pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32", size = 14440457, upload-time = "2024-09-20T13:09:04.105Z" }, + { url = "https://files.pythonhosted.org/packages/ed/8c/87ddf1fcb55d11f9f847e3c69bb1c6f8e46e2f40ab1a2d2abadb2401b007/pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5", size = 11617166, upload-time = "2024-09-20T13:09:06.917Z" }, + { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload-time = "2024-09-20T13:09:09.655Z" }, + { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload-time = "2024-09-20T13:09:14.718Z" }, + { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload-time = "2024-09-20T19:02:03.88Z" }, + { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload-time = "2024-09-20T13:09:17.621Z" }, + { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload-time = "2024-09-20T19:02:07.094Z" }, + { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload-time = "2024-09-20T13:09:20.474Z" }, + { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload-time = "2024-09-20T13:09:23.137Z" }, + { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643, upload-time = "2024-09-20T13:09:25.522Z" }, + { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573, upload-time = "2024-09-20T13:09:28.012Z" }, + { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085, upload-time = "2024-09-20T19:02:10.451Z" }, + { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809, upload-time = "2024-09-20T13:09:30.814Z" }, + { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316, upload-time = "2024-09-20T19:02:13.825Z" }, + { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055, upload-time = "2024-09-20T13:09:33.462Z" }, + { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175, upload-time = "2024-09-20T13:09:35.871Z" }, + { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650, upload-time = "2024-09-20T13:09:38.685Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177, upload-time = "2024-09-20T13:09:41.141Z" }, + { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526, upload-time = "2024-09-20T19:02:16.905Z" }, + { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013, upload-time = "2024-09-20T13:09:44.39Z" }, + { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620, upload-time = "2024-09-20T19:02:20.639Z" }, + { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" }, +] + [[package]] name = "parso" version = "0.8.4" @@ -1143,6 +1423,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "polars" +version = "1.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/82/b6/8dbdf626c0705a57f052708c9fc0860ffc2aa97955930d5faaf6a66fcfd3/polars-1.30.0.tar.gz", hash = "sha256:dfe94ae84a5efd9ba74e616e3e125b24ca155494a931890a8f17480737c4db45", size = 4668318, upload-time = "2025-05-21T13:33:24.175Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/48/e9b2cb379abcc9f7aff2e701098fcdb9fe6d85dc4ad4cec7b35d39c70951/polars-1.30.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:4c33bc97c29b7112f0e689a2f8a33143973a3ff466c70b25c7fd1880225de6dd", size = 35704342, upload-time = "2025-05-21T13:32:22.996Z" }, + { url = "https://files.pythonhosted.org/packages/36/ca/f545f61282f75eea4dfde4db2944963dcd59abd50c20e33a1c894da44dad/polars-1.30.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:e3d05914c364b8e39a5b10dcf97e84d76e516b3b1693880bf189a93aab3ca00d", size = 32459857, upload-time = "2025-05-21T13:32:27.728Z" }, + { url = "https://files.pythonhosted.org/packages/76/20/e018cd87d7cb6f8684355f31f4e193222455a6e8f7b942f4a2934f5969c7/polars-1.30.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a52af3862082b868c1febeae650af8ae8a2105d2cb28f0449179a7b44f54ccf", size = 36267243, upload-time = "2025-05-21T13:32:31.796Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e7/b88b973021be07b13d91b9301cc14392c994225ef5107a32a8ffd3fd6424/polars-1.30.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:ffb3ef133454275d4254442257c5f71dd6e393ce365c97997dadeb6fa9d6d4b5", size = 33416871, upload-time = "2025-05-21T13:32:35.077Z" }, + { url = "https://files.pythonhosted.org/packages/dd/7c/d46d4381adeac537b8520b653dc30cb8b7edbf59883d71fbb989e9005de1/polars-1.30.0-cp39-abi3-win_amd64.whl", hash = "sha256:c26b633a9bd530c5fc09d317fca3bb3e16c772bd7df7549a9d8ec1934773cc5d", size = 36363630, upload-time = "2025-05-21T13:32:38.286Z" }, + { url = "https://files.pythonhosted.org/packages/fb/b5/5056d0c12aadb57390d0627492bef8b1abf3549474abb9ae0fd4e2bfa885/polars-1.30.0-cp39-abi3-win_arm64.whl", hash = "sha256:476f1bde65bc7b4d9f80af370645c2981b5798d67c151055e58534e89e96f2a8", size = 32643590, upload-time = "2025-05-21T13:32:42.107Z" }, +] + [[package]] name = "prompt-toolkit" version = "3.0.51" @@ -1188,6 +1482,59 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" }, ] +[[package]] +name = "pyarrow" +version = "20.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187, upload-time = "2025-04-27T12:34:23.264Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5b/23/77094eb8ee0dbe88441689cb6afc40ac312a1e15d3a7acc0586999518222/pyarrow-20.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:c7dd06fd7d7b410ca5dc839cc9d485d2bc4ae5240851bcd45d85105cc90a47d7", size = 30832591, upload-time = "2025-04-27T12:27:27.89Z" }, + { url = "https://files.pythonhosted.org/packages/c3/d5/48cc573aff00d62913701d9fac478518f693b30c25f2c157550b0b2565cb/pyarrow-20.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:d5382de8dc34c943249b01c19110783d0d64b207167c728461add1ecc2db88e4", size = 32273686, upload-time = "2025-04-27T12:27:36.816Z" }, + { url = "https://files.pythonhosted.org/packages/37/df/4099b69a432b5cb412dd18adc2629975544d656df3d7fda6d73c5dba935d/pyarrow-20.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6415a0d0174487456ddc9beaead703d0ded5966129fa4fd3114d76b5d1c5ceae", size = 41337051, upload-time = "2025-04-27T12:27:44.4Z" }, + { url = "https://files.pythonhosted.org/packages/4c/27/99922a9ac1c9226f346e3a1e15e63dee6f623ed757ff2893f9d6994a69d3/pyarrow-20.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15aa1b3b2587e74328a730457068dc6c89e6dcbf438d4369f572af9d320a25ee", size = 42404659, upload-time = "2025-04-27T12:27:51.715Z" }, + { url = "https://files.pythonhosted.org/packages/21/d1/71d91b2791b829c9e98f1e0d85be66ed93aff399f80abb99678511847eaa/pyarrow-20.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5605919fbe67a7948c1f03b9f3727d82846c053cd2ce9303ace791855923fd20", size = 40695446, upload-time = "2025-04-27T12:27:59.643Z" }, + { url = "https://files.pythonhosted.org/packages/f1/ca/ae10fba419a6e94329707487835ec721f5a95f3ac9168500bcf7aa3813c7/pyarrow-20.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a5704f29a74b81673d266e5ec1fe376f060627c2e42c5c7651288ed4b0db29e9", size = 42278528, upload-time = "2025-04-27T12:28:07.297Z" }, + { url = "https://files.pythonhosted.org/packages/7a/a6/aba40a2bf01b5d00cf9cd16d427a5da1fad0fb69b514ce8c8292ab80e968/pyarrow-20.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:00138f79ee1b5aca81e2bdedb91e3739b987245e11fa3c826f9e57c5d102fb75", size = 42918162, upload-time = "2025-04-27T12:28:15.716Z" }, + { url = "https://files.pythonhosted.org/packages/93/6b/98b39650cd64f32bf2ec6d627a9bd24fcb3e4e6ea1873c5e1ea8a83b1a18/pyarrow-20.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f2d67ac28f57a362f1a2c1e6fa98bfe2f03230f7e15927aecd067433b1e70ce8", size = 44550319, upload-time = "2025-04-27T12:28:27.026Z" }, + { url = "https://files.pythonhosted.org/packages/ab/32/340238be1eb5037e7b5de7e640ee22334417239bc347eadefaf8c373936d/pyarrow-20.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:4a8b029a07956b8d7bd742ffca25374dd3f634b35e46cc7a7c3fa4c75b297191", size = 25770759, upload-time = "2025-04-27T12:28:33.702Z" }, + { url = "https://files.pythonhosted.org/packages/47/a2/b7930824181ceadd0c63c1042d01fa4ef63eee233934826a7a2a9af6e463/pyarrow-20.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:24ca380585444cb2a31324c546a9a56abbe87e26069189e14bdba19c86c049f0", size = 30856035, upload-time = "2025-04-27T12:28:40.78Z" }, + { url = "https://files.pythonhosted.org/packages/9b/18/c765770227d7f5bdfa8a69f64b49194352325c66a5c3bb5e332dfd5867d9/pyarrow-20.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:95b330059ddfdc591a3225f2d272123be26c8fa76e8c9ee1a77aad507361cfdb", size = 32309552, upload-time = "2025-04-27T12:28:47.051Z" }, + { url = "https://files.pythonhosted.org/packages/44/fb/dfb2dfdd3e488bb14f822d7335653092dde150cffc2da97de6e7500681f9/pyarrow-20.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f0fb1041267e9968c6d0d2ce3ff92e3928b243e2b6d11eeb84d9ac547308232", size = 41334704, upload-time = "2025-04-27T12:28:55.064Z" }, + { url = "https://files.pythonhosted.org/packages/58/0d/08a95878d38808051a953e887332d4a76bc06c6ee04351918ee1155407eb/pyarrow-20.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8ff87cc837601532cc8242d2f7e09b4e02404de1b797aee747dd4ba4bd6313f", size = 42399836, upload-time = "2025-04-27T12:29:02.13Z" }, + { url = "https://files.pythonhosted.org/packages/f3/cd/efa271234dfe38f0271561086eedcad7bc0f2ddd1efba423916ff0883684/pyarrow-20.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:7a3a5dcf54286e6141d5114522cf31dd67a9e7c9133d150799f30ee302a7a1ab", size = 40711789, upload-time = "2025-04-27T12:29:09.951Z" }, + { url = "https://files.pythonhosted.org/packages/46/1f/7f02009bc7fc8955c391defee5348f510e589a020e4b40ca05edcb847854/pyarrow-20.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a6ad3e7758ecf559900261a4df985662df54fb7fdb55e8e3b3aa99b23d526b62", size = 42301124, upload-time = "2025-04-27T12:29:17.187Z" }, + { url = "https://files.pythonhosted.org/packages/4f/92/692c562be4504c262089e86757a9048739fe1acb4024f92d39615e7bab3f/pyarrow-20.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6bb830757103a6cb300a04610e08d9636f0cd223d32f388418ea893a3e655f1c", size = 42916060, upload-time = "2025-04-27T12:29:24.253Z" }, + { url = "https://files.pythonhosted.org/packages/a4/ec/9f5c7e7c828d8e0a3c7ef50ee62eca38a7de2fa6eb1b8fa43685c9414fef/pyarrow-20.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:96e37f0766ecb4514a899d9a3554fadda770fb57ddf42b63d80f14bc20aa7db3", size = 44547640, upload-time = "2025-04-27T12:29:32.782Z" }, + { url = "https://files.pythonhosted.org/packages/54/96/46613131b4727f10fd2ffa6d0d6f02efcc09a0e7374eff3b5771548aa95b/pyarrow-20.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:3346babb516f4b6fd790da99b98bed9708e3f02e734c84971faccb20736848dc", size = 25781491, upload-time = "2025-04-27T12:29:38.464Z" }, + { url = "https://files.pythonhosted.org/packages/a1/d6/0c10e0d54f6c13eb464ee9b67a68b8c71bcf2f67760ef5b6fbcddd2ab05f/pyarrow-20.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba", size = 30815067, upload-time = "2025-04-27T12:29:44.384Z" }, + { url = "https://files.pythonhosted.org/packages/7e/e2/04e9874abe4094a06fd8b0cbb0f1312d8dd7d707f144c2ec1e5e8f452ffa/pyarrow-20.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781", size = 32297128, upload-time = "2025-04-27T12:29:52.038Z" }, + { url = "https://files.pythonhosted.org/packages/31/fd/c565e5dcc906a3b471a83273039cb75cb79aad4a2d4a12f76cc5ae90a4b8/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199", size = 41334890, upload-time = "2025-04-27T12:29:59.452Z" }, + { url = "https://files.pythonhosted.org/packages/af/a9/3bdd799e2c9b20c1ea6dc6fa8e83f29480a97711cf806e823f808c2316ac/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd", size = 42421775, upload-time = "2025-04-27T12:30:06.875Z" }, + { url = "https://files.pythonhosted.org/packages/10/f7/da98ccd86354c332f593218101ae56568d5dcedb460e342000bd89c49cc1/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28", size = 40687231, upload-time = "2025-04-27T12:30:13.954Z" }, + { url = "https://files.pythonhosted.org/packages/bb/1b/2168d6050e52ff1e6cefc61d600723870bf569cbf41d13db939c8cf97a16/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8", size = 42295639, upload-time = "2025-04-27T12:30:21.949Z" }, + { url = "https://files.pythonhosted.org/packages/b2/66/2d976c0c7158fd25591c8ca55aee026e6d5745a021915a1835578707feb3/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e", size = 42908549, upload-time = "2025-04-27T12:30:29.551Z" }, + { url = "https://files.pythonhosted.org/packages/31/a9/dfb999c2fc6911201dcbf348247f9cc382a8990f9ab45c12eabfd7243a38/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a", size = 44557216, upload-time = "2025-04-27T12:30:36.977Z" }, + { url = "https://files.pythonhosted.org/packages/a0/8e/9adee63dfa3911be2382fb4d92e4b2e7d82610f9d9f668493bebaa2af50f/pyarrow-20.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b", size = 25660496, upload-time = "2025-04-27T12:30:42.809Z" }, + { url = "https://files.pythonhosted.org/packages/9b/aa/daa413b81446d20d4dad2944110dcf4cf4f4179ef7f685dd5a6d7570dc8e/pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893", size = 30798501, upload-time = "2025-04-27T12:30:48.351Z" }, + { url = "https://files.pythonhosted.org/packages/ff/75/2303d1caa410925de902d32ac215dc80a7ce7dd8dfe95358c165f2adf107/pyarrow-20.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061", size = 32277895, upload-time = "2025-04-27T12:30:55.238Z" }, + { url = "https://files.pythonhosted.org/packages/92/41/fe18c7c0b38b20811b73d1bdd54b1fccba0dab0e51d2048878042d84afa8/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae", size = 41327322, upload-time = "2025-04-27T12:31:05.587Z" }, + { url = "https://files.pythonhosted.org/packages/da/ab/7dbf3d11db67c72dbf36ae63dcbc9f30b866c153b3a22ef728523943eee6/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4", size = 42411441, upload-time = "2025-04-27T12:31:15.675Z" }, + { url = "https://files.pythonhosted.org/packages/90/c3/0c7da7b6dac863af75b64e2f827e4742161128c350bfe7955b426484e226/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5", size = 40677027, upload-time = "2025-04-27T12:31:24.631Z" }, + { url = "https://files.pythonhosted.org/packages/be/27/43a47fa0ff9053ab5203bb3faeec435d43c0d8bfa40179bfd076cdbd4e1c/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b", size = 42281473, upload-time = "2025-04-27T12:31:31.311Z" }, + { url = "https://files.pythonhosted.org/packages/bc/0b/d56c63b078876da81bbb9ba695a596eabee9b085555ed12bf6eb3b7cab0e/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3", size = 42893897, upload-time = "2025-04-27T12:31:39.406Z" }, + { url = "https://files.pythonhosted.org/packages/92/ac/7d4bd020ba9145f354012838692d48300c1b8fe5634bfda886abcada67ed/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368", size = 44543847, upload-time = "2025-04-27T12:31:45.997Z" }, + { url = "https://files.pythonhosted.org/packages/9d/07/290f4abf9ca702c5df7b47739c1b2c83588641ddfa2cc75e34a301d42e55/pyarrow-20.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031", size = 25653219, upload-time = "2025-04-27T12:31:54.11Z" }, + { url = "https://files.pythonhosted.org/packages/95/df/720bb17704b10bd69dde086e1400b8eefb8f58df3f8ac9cff6c425bf57f1/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63", size = 30853957, upload-time = "2025-04-27T12:31:59.215Z" }, + { url = "https://files.pythonhosted.org/packages/d9/72/0d5f875efc31baef742ba55a00a25213a19ea64d7176e0fe001c5d8b6e9a/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c", size = 32247972, upload-time = "2025-04-27T12:32:05.369Z" }, + { url = "https://files.pythonhosted.org/packages/d5/bc/e48b4fa544d2eea72f7844180eb77f83f2030b84c8dad860f199f94307ed/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70", size = 41256434, upload-time = "2025-04-27T12:32:11.814Z" }, + { url = "https://files.pythonhosted.org/packages/c3/01/974043a29874aa2cf4f87fb07fd108828fc7362300265a2a64a94965e35b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b", size = 42353648, upload-time = "2025-04-27T12:32:20.766Z" }, + { url = "https://files.pythonhosted.org/packages/68/95/cc0d3634cde9ca69b0e51cbe830d8915ea32dda2157560dda27ff3b3337b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122", size = 40619853, upload-time = "2025-04-27T12:32:28.1Z" }, + { url = "https://files.pythonhosted.org/packages/29/c2/3ad40e07e96a3e74e7ed7cc8285aadfa84eb848a798c98ec0ad009eb6bcc/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6", size = 42241743, upload-time = "2025-04-27T12:32:35.792Z" }, + { url = "https://files.pythonhosted.org/packages/eb/cb/65fa110b483339add6a9bc7b6373614166b14e20375d4daa73483755f830/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c", size = 42839441, upload-time = "2025-04-27T12:32:46.64Z" }, + { url = "https://files.pythonhosted.org/packages/98/7b/f30b1954589243207d7a0fbc9997401044bf9a033eec78f6cb50da3f304a/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a", size = 44503279, upload-time = "2025-04-27T12:32:56.503Z" }, + { url = "https://files.pythonhosted.org/packages/37/40/ad395740cd641869a13bcf60851296c89624662575621968dcfafabaa7f6/pyarrow-20.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9", size = 25944982, upload-time = "2025-04-27T12:33:04.72Z" }, +] + [[package]] name = "pycparser" version = "2.22" @@ -1197,6 +1544,108 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552, upload-time = "2024-03-30T13:22:20.476Z" }, ] +[[package]] +name = "pydantic" +version = "2.11.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-types" }, + { name = "pydantic-core" }, + { name = "typing-extensions" }, + { name = "typing-inspection" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f0/86/8ce9040065e8f924d642c58e4a344e33163a07f6b57f836d0d734e0ad3fb/pydantic-2.11.5.tar.gz", hash = "sha256:7f853db3d0ce78ce8bbb148c401c2cdd6431b3473c0cdff2755c7690952a7b7a", size = 787102, upload-time = "2025-05-22T21:18:08.761Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/69/831ed22b38ff9b4b64b66569f0e5b7b97cf3638346eb95a2147fdb49ad5f/pydantic-2.11.5-py3-none-any.whl", hash = "sha256:f9c26ba06f9747749ca1e5c94d6a85cb84254577553c8785576fd38fa64dc0f7", size = 444229, upload-time = "2025-05-22T21:18:06.329Z" }, +] + +[[package]] +name = "pydantic-core" +version = "2.33.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ad/88/5f2260bdfae97aabf98f1778d43f69574390ad787afb646292a638c923d4/pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc", size = 435195, upload-time = "2025-04-23T18:33:52.104Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/92/b31726561b5dae176c2d2c2dc43a9c5bfba5d32f96f8b4c0a600dd492447/pydantic_core-2.33.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2b3d326aaef0c0399d9afffeb6367d5e26ddc24d351dbc9c636840ac355dc5d8", size = 2028817, upload-time = "2025-04-23T18:30:43.919Z" }, + { url = "https://files.pythonhosted.org/packages/a3/44/3f0b95fafdaca04a483c4e685fe437c6891001bf3ce8b2fded82b9ea3aa1/pydantic_core-2.33.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e5b2671f05ba48b94cb90ce55d8bdcaaedb8ba00cc5359f6810fc918713983d", size = 1861357, upload-time = "2025-04-23T18:30:46.372Z" }, + { url = "https://files.pythonhosted.org/packages/30/97/e8f13b55766234caae05372826e8e4b3b96e7b248be3157f53237682e43c/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0069c9acc3f3981b9ff4cdfaf088e98d83440a4c7ea1bc07460af3d4dc22e72d", size = 1898011, upload-time = "2025-04-23T18:30:47.591Z" }, + { url = "https://files.pythonhosted.org/packages/9b/a3/99c48cf7bafc991cc3ee66fd544c0aae8dc907b752f1dad2d79b1b5a471f/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d53b22f2032c42eaaf025f7c40c2e3b94568ae077a606f006d206a463bc69572", size = 1982730, upload-time = "2025-04-23T18:30:49.328Z" }, + { url = "https://files.pythonhosted.org/packages/de/8e/a5b882ec4307010a840fb8b58bd9bf65d1840c92eae7534c7441709bf54b/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0405262705a123b7ce9f0b92f123334d67b70fd1f20a9372b907ce1080c7ba02", size = 2136178, upload-time = "2025-04-23T18:30:50.907Z" }, + { url = "https://files.pythonhosted.org/packages/e4/bb/71e35fc3ed05af6834e890edb75968e2802fe98778971ab5cba20a162315/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b25d91e288e2c4e0662b8038a28c6a07eaac3e196cfc4ff69de4ea3db992a1b", size = 2736462, upload-time = "2025-04-23T18:30:52.083Z" }, + { url = "https://files.pythonhosted.org/packages/31/0d/c8f7593e6bc7066289bbc366f2235701dcbebcd1ff0ef8e64f6f239fb47d/pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bdfe4b3789761f3bcb4b1ddf33355a71079858958e3a552f16d5af19768fef2", size = 2005652, upload-time = "2025-04-23T18:30:53.389Z" }, + { url = "https://files.pythonhosted.org/packages/d2/7a/996d8bd75f3eda405e3dd219ff5ff0a283cd8e34add39d8ef9157e722867/pydantic_core-2.33.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efec8db3266b76ef9607c2c4c419bdb06bf335ae433b80816089ea7585816f6a", size = 2113306, upload-time = "2025-04-23T18:30:54.661Z" }, + { url = "https://files.pythonhosted.org/packages/ff/84/daf2a6fb2db40ffda6578a7e8c5a6e9c8affb251a05c233ae37098118788/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:031c57d67ca86902726e0fae2214ce6770bbe2f710dc33063187a68744a5ecac", size = 2073720, upload-time = "2025-04-23T18:30:56.11Z" }, + { url = "https://files.pythonhosted.org/packages/77/fb/2258da019f4825128445ae79456a5499c032b55849dbd5bed78c95ccf163/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:f8de619080e944347f5f20de29a975c2d815d9ddd8be9b9b7268e2e3ef68605a", size = 2244915, upload-time = "2025-04-23T18:30:57.501Z" }, + { url = "https://files.pythonhosted.org/packages/d8/7a/925ff73756031289468326e355b6fa8316960d0d65f8b5d6b3a3e7866de7/pydantic_core-2.33.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:73662edf539e72a9440129f231ed3757faab89630d291b784ca99237fb94db2b", size = 2241884, upload-time = "2025-04-23T18:30:58.867Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b0/249ee6d2646f1cdadcb813805fe76265745c4010cf20a8eba7b0e639d9b2/pydantic_core-2.33.2-cp310-cp310-win32.whl", hash = "sha256:0a39979dcbb70998b0e505fb1556a1d550a0781463ce84ebf915ba293ccb7e22", size = 1910496, upload-time = "2025-04-23T18:31:00.078Z" }, + { url = "https://files.pythonhosted.org/packages/66/ff/172ba8f12a42d4b552917aa65d1f2328990d3ccfc01d5b7c943ec084299f/pydantic_core-2.33.2-cp310-cp310-win_amd64.whl", hash = "sha256:b0379a2b24882fef529ec3b4987cb5d003b9cda32256024e6fe1586ac45fc640", size = 1955019, upload-time = "2025-04-23T18:31:01.335Z" }, + { url = "https://files.pythonhosted.org/packages/3f/8d/71db63483d518cbbf290261a1fc2839d17ff89fce7089e08cad07ccfce67/pydantic_core-2.33.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:4c5b0a576fb381edd6d27f0a85915c6daf2f8138dc5c267a57c08a62900758c7", size = 2028584, upload-time = "2025-04-23T18:31:03.106Z" }, + { url = "https://files.pythonhosted.org/packages/24/2f/3cfa7244ae292dd850989f328722d2aef313f74ffc471184dc509e1e4e5a/pydantic_core-2.33.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e799c050df38a639db758c617ec771fd8fb7a5f8eaaa4b27b101f266b216a246", size = 1855071, upload-time = "2025-04-23T18:31:04.621Z" }, + { url = "https://files.pythonhosted.org/packages/b3/d3/4ae42d33f5e3f50dd467761304be2fa0a9417fbf09735bc2cce003480f2a/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc46a01bf8d62f227d5ecee74178ffc448ff4e5197c756331f71efcc66dc980f", size = 1897823, upload-time = "2025-04-23T18:31:06.377Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f3/aa5976e8352b7695ff808599794b1fba2a9ae2ee954a3426855935799488/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a144d4f717285c6d9234a66778059f33a89096dfb9b39117663fd8413d582dcc", size = 1983792, upload-time = "2025-04-23T18:31:07.93Z" }, + { url = "https://files.pythonhosted.org/packages/d5/7a/cda9b5a23c552037717f2b2a5257e9b2bfe45e687386df9591eff7b46d28/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cf6373c21bc80b2e0dc88444f41ae60b2f070ed02095754eb5a01df12256de", size = 2136338, upload-time = "2025-04-23T18:31:09.283Z" }, + { url = "https://files.pythonhosted.org/packages/2b/9f/b8f9ec8dd1417eb9da784e91e1667d58a2a4a7b7b34cf4af765ef663a7e5/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dc625f4aa79713512d1976fe9f0bc99f706a9dee21dfd1810b4bbbf228d0e8a", size = 2730998, upload-time = "2025-04-23T18:31:11.7Z" }, + { url = "https://files.pythonhosted.org/packages/47/bc/cd720e078576bdb8255d5032c5d63ee5c0bf4b7173dd955185a1d658c456/pydantic_core-2.33.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:881b21b5549499972441da4758d662aeea93f1923f953e9cbaff14b8b9565aef", size = 2003200, upload-time = "2025-04-23T18:31:13.536Z" }, + { url = "https://files.pythonhosted.org/packages/ca/22/3602b895ee2cd29d11a2b349372446ae9727c32e78a94b3d588a40fdf187/pydantic_core-2.33.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bdc25f3681f7b78572699569514036afe3c243bc3059d3942624e936ec93450e", size = 2113890, upload-time = "2025-04-23T18:31:15.011Z" }, + { url = "https://files.pythonhosted.org/packages/ff/e6/e3c5908c03cf00d629eb38393a98fccc38ee0ce8ecce32f69fc7d7b558a7/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:fe5b32187cbc0c862ee201ad66c30cf218e5ed468ec8dc1cf49dec66e160cc4d", size = 2073359, upload-time = "2025-04-23T18:31:16.393Z" }, + { url = "https://files.pythonhosted.org/packages/12/e7/6a36a07c59ebefc8777d1ffdaf5ae71b06b21952582e4b07eba88a421c79/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:bc7aee6f634a6f4a95676fcb5d6559a2c2a390330098dba5e5a5f28a2e4ada30", size = 2245883, upload-time = "2025-04-23T18:31:17.892Z" }, + { url = "https://files.pythonhosted.org/packages/16/3f/59b3187aaa6cc0c1e6616e8045b284de2b6a87b027cce2ffcea073adf1d2/pydantic_core-2.33.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:235f45e5dbcccf6bd99f9f472858849f73d11120d76ea8707115415f8e5ebebf", size = 2241074, upload-time = "2025-04-23T18:31:19.205Z" }, + { url = "https://files.pythonhosted.org/packages/e0/ed/55532bb88f674d5d8f67ab121a2a13c385df382de2a1677f30ad385f7438/pydantic_core-2.33.2-cp311-cp311-win32.whl", hash = "sha256:6368900c2d3ef09b69cb0b913f9f8263b03786e5b2a387706c5afb66800efd51", size = 1910538, upload-time = "2025-04-23T18:31:20.541Z" }, + { url = "https://files.pythonhosted.org/packages/fe/1b/25b7cccd4519c0b23c2dd636ad39d381abf113085ce4f7bec2b0dc755eb1/pydantic_core-2.33.2-cp311-cp311-win_amd64.whl", hash = "sha256:1e063337ef9e9820c77acc768546325ebe04ee38b08703244c1309cccc4f1bab", size = 1952909, upload-time = "2025-04-23T18:31:22.371Z" }, + { url = "https://files.pythonhosted.org/packages/49/a9/d809358e49126438055884c4366a1f6227f0f84f635a9014e2deb9b9de54/pydantic_core-2.33.2-cp311-cp311-win_arm64.whl", hash = "sha256:6b99022f1d19bc32a4c2a0d544fc9a76e3be90f0b3f4af413f87d38749300e65", size = 1897786, upload-time = "2025-04-23T18:31:24.161Z" }, + { url = "https://files.pythonhosted.org/packages/18/8a/2b41c97f554ec8c71f2a8a5f85cb56a8b0956addfe8b0efb5b3d77e8bdc3/pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc", size = 2009000, upload-time = "2025-04-23T18:31:25.863Z" }, + { url = "https://files.pythonhosted.org/packages/a1/02/6224312aacb3c8ecbaa959897af57181fb6cf3a3d7917fd44d0f2917e6f2/pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7", size = 1847996, upload-time = "2025-04-23T18:31:27.341Z" }, + { url = "https://files.pythonhosted.org/packages/d6/46/6dcdf084a523dbe0a0be59d054734b86a981726f221f4562aed313dbcb49/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025", size = 1880957, upload-time = "2025-04-23T18:31:28.956Z" }, + { url = "https://files.pythonhosted.org/packages/ec/6b/1ec2c03837ac00886ba8160ce041ce4e325b41d06a034adbef11339ae422/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011", size = 1964199, upload-time = "2025-04-23T18:31:31.025Z" }, + { url = "https://files.pythonhosted.org/packages/2d/1d/6bf34d6adb9debd9136bd197ca72642203ce9aaaa85cfcbfcf20f9696e83/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f", size = 2120296, upload-time = "2025-04-23T18:31:32.514Z" }, + { url = "https://files.pythonhosted.org/packages/e0/94/2bd0aaf5a591e974b32a9f7123f16637776c304471a0ab33cf263cf5591a/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88", size = 2676109, upload-time = "2025-04-23T18:31:33.958Z" }, + { url = "https://files.pythonhosted.org/packages/f9/41/4b043778cf9c4285d59742281a769eac371b9e47e35f98ad321349cc5d61/pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1", size = 2002028, upload-time = "2025-04-23T18:31:39.095Z" }, + { url = "https://files.pythonhosted.org/packages/cb/d5/7bb781bf2748ce3d03af04d5c969fa1308880e1dca35a9bd94e1a96a922e/pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b", size = 2100044, upload-time = "2025-04-23T18:31:41.034Z" }, + { url = "https://files.pythonhosted.org/packages/fe/36/def5e53e1eb0ad896785702a5bbfd25eed546cdcf4087ad285021a90ed53/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1", size = 2058881, upload-time = "2025-04-23T18:31:42.757Z" }, + { url = "https://files.pythonhosted.org/packages/01/6c/57f8d70b2ee57fc3dc8b9610315949837fa8c11d86927b9bb044f8705419/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6", size = 2227034, upload-time = "2025-04-23T18:31:44.304Z" }, + { url = "https://files.pythonhosted.org/packages/27/b9/9c17f0396a82b3d5cbea4c24d742083422639e7bb1d5bf600e12cb176a13/pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea", size = 2234187, upload-time = "2025-04-23T18:31:45.891Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6a/adf5734ffd52bf86d865093ad70b2ce543415e0e356f6cacabbc0d9ad910/pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290", size = 1892628, upload-time = "2025-04-23T18:31:47.819Z" }, + { url = "https://files.pythonhosted.org/packages/43/e4/5479fecb3606c1368d496a825d8411e126133c41224c1e7238be58b87d7e/pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2", size = 1955866, upload-time = "2025-04-23T18:31:49.635Z" }, + { url = "https://files.pythonhosted.org/packages/0d/24/8b11e8b3e2be9dd82df4b11408a67c61bb4dc4f8e11b5b0fc888b38118b5/pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab", size = 1888894, upload-time = "2025-04-23T18:31:51.609Z" }, + { url = "https://files.pythonhosted.org/packages/46/8c/99040727b41f56616573a28771b1bfa08a3d3fe74d3d513f01251f79f172/pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f", size = 2015688, upload-time = "2025-04-23T18:31:53.175Z" }, + { url = "https://files.pythonhosted.org/packages/3a/cc/5999d1eb705a6cefc31f0b4a90e9f7fc400539b1a1030529700cc1b51838/pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6", size = 1844808, upload-time = "2025-04-23T18:31:54.79Z" }, + { url = "https://files.pythonhosted.org/packages/6f/5e/a0a7b8885c98889a18b6e376f344da1ef323d270b44edf8174d6bce4d622/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef", size = 1885580, upload-time = "2025-04-23T18:31:57.393Z" }, + { url = "https://files.pythonhosted.org/packages/3b/2a/953581f343c7d11a304581156618c3f592435523dd9d79865903272c256a/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a", size = 1973859, upload-time = "2025-04-23T18:31:59.065Z" }, + { url = "https://files.pythonhosted.org/packages/e6/55/f1a813904771c03a3f97f676c62cca0c0a4138654107c1b61f19c644868b/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916", size = 2120810, upload-time = "2025-04-23T18:32:00.78Z" }, + { url = "https://files.pythonhosted.org/packages/aa/c3/053389835a996e18853ba107a63caae0b9deb4a276c6b472931ea9ae6e48/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a", size = 2676498, upload-time = "2025-04-23T18:32:02.418Z" }, + { url = "https://files.pythonhosted.org/packages/eb/3c/f4abd740877a35abade05e437245b192f9d0ffb48bbbbd708df33d3cda37/pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d", size = 2000611, upload-time = "2025-04-23T18:32:04.152Z" }, + { url = "https://files.pythonhosted.org/packages/59/a7/63ef2fed1837d1121a894d0ce88439fe3e3b3e48c7543b2a4479eb99c2bd/pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56", size = 2107924, upload-time = "2025-04-23T18:32:06.129Z" }, + { url = "https://files.pythonhosted.org/packages/04/8f/2551964ef045669801675f1cfc3b0d74147f4901c3ffa42be2ddb1f0efc4/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5", size = 2063196, upload-time = "2025-04-23T18:32:08.178Z" }, + { url = "https://files.pythonhosted.org/packages/26/bd/d9602777e77fc6dbb0c7db9ad356e9a985825547dce5ad1d30ee04903918/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e", size = 2236389, upload-time = "2025-04-23T18:32:10.242Z" }, + { url = "https://files.pythonhosted.org/packages/42/db/0e950daa7e2230423ab342ae918a794964b053bec24ba8af013fc7c94846/pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162", size = 2239223, upload-time = "2025-04-23T18:32:12.382Z" }, + { url = "https://files.pythonhosted.org/packages/58/4d/4f937099c545a8a17eb52cb67fe0447fd9a373b348ccfa9a87f141eeb00f/pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849", size = 1900473, upload-time = "2025-04-23T18:32:14.034Z" }, + { url = "https://files.pythonhosted.org/packages/a0/75/4a0a9bac998d78d889def5e4ef2b065acba8cae8c93696906c3a91f310ca/pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9", size = 1955269, upload-time = "2025-04-23T18:32:15.783Z" }, + { url = "https://files.pythonhosted.org/packages/f9/86/1beda0576969592f1497b4ce8e7bc8cbdf614c352426271b1b10d5f0aa64/pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9", size = 1893921, upload-time = "2025-04-23T18:32:18.473Z" }, + { url = "https://files.pythonhosted.org/packages/a4/7d/e09391c2eebeab681df2b74bfe6c43422fffede8dc74187b2b0bf6fd7571/pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac", size = 1806162, upload-time = "2025-04-23T18:32:20.188Z" }, + { url = "https://files.pythonhosted.org/packages/f1/3d/847b6b1fed9f8ed3bb95a9ad04fbd0b212e832d4f0f50ff4d9ee5a9f15cf/pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5", size = 1981560, upload-time = "2025-04-23T18:32:22.354Z" }, + { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" }, + { url = "https://files.pythonhosted.org/packages/30/68/373d55e58b7e83ce371691f6eaa7175e3a24b956c44628eb25d7da007917/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5c4aa4e82353f65e548c476b37e64189783aa5384903bfea4f41580f255fddfa", size = 2023982, upload-time = "2025-04-23T18:32:53.14Z" }, + { url = "https://files.pythonhosted.org/packages/a4/16/145f54ac08c96a63d8ed6442f9dec17b2773d19920b627b18d4f10a061ea/pydantic_core-2.33.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d946c8bf0d5c24bf4fe333af284c59a19358aa3ec18cb3dc4370080da1e8ad29", size = 1858412, upload-time = "2025-04-23T18:32:55.52Z" }, + { url = "https://files.pythonhosted.org/packages/41/b1/c6dc6c3e2de4516c0bb2c46f6a373b91b5660312342a0cf5826e38ad82fa/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87b31b6846e361ef83fedb187bb5b4372d0da3f7e28d85415efa92d6125d6e6d", size = 1892749, upload-time = "2025-04-23T18:32:57.546Z" }, + { url = "https://files.pythonhosted.org/packages/12/73/8cd57e20afba760b21b742106f9dbdfa6697f1570b189c7457a1af4cd8a0/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa9d91b338f2df0508606f7009fde642391425189bba6d8c653afd80fd6bb64e", size = 2067527, upload-time = "2025-04-23T18:32:59.771Z" }, + { url = "https://files.pythonhosted.org/packages/e3/d5/0bb5d988cc019b3cba4a78f2d4b3854427fc47ee8ec8e9eaabf787da239c/pydantic_core-2.33.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2058a32994f1fde4ca0480ab9d1e75a0e8c87c22b53a3ae66554f9af78f2fe8c", size = 2108225, upload-time = "2025-04-23T18:33:04.51Z" }, + { url = "https://files.pythonhosted.org/packages/f1/c5/00c02d1571913d496aabf146106ad8239dc132485ee22efe08085084ff7c/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0e03262ab796d986f978f79c943fc5f620381be7287148b8010b4097f79a39ec", size = 2069490, upload-time = "2025-04-23T18:33:06.391Z" }, + { url = "https://files.pythonhosted.org/packages/22/a8/dccc38768274d3ed3a59b5d06f59ccb845778687652daa71df0cab4040d7/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1a8695a8d00c73e50bff9dfda4d540b7dee29ff9b8053e38380426a85ef10052", size = 2237525, upload-time = "2025-04-23T18:33:08.44Z" }, + { url = "https://files.pythonhosted.org/packages/d4/e7/4f98c0b125dda7cf7ccd14ba936218397b44f50a56dd8c16a3091df116c3/pydantic_core-2.33.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fa754d1850735a0b0e03bcffd9d4b4343eb417e47196e4485d9cca326073a42c", size = 2238446, upload-time = "2025-04-23T18:33:10.313Z" }, + { url = "https://files.pythonhosted.org/packages/ce/91/2ec36480fdb0b783cd9ef6795753c1dea13882f2e68e73bce76ae8c21e6a/pydantic_core-2.33.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a11c8d26a50bfab49002947d3d237abe4d9e4b5bdc8846a63537b6488e197808", size = 2066678, upload-time = "2025-04-23T18:33:12.224Z" }, + { url = "https://files.pythonhosted.org/packages/7b/27/d4ae6487d73948d6f20dddcd94be4ea43e74349b56eba82e9bdee2d7494c/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:dd14041875d09cc0f9308e37a6f8b65f5585cf2598a53aa0123df8b129d481f8", size = 2025200, upload-time = "2025-04-23T18:33:14.199Z" }, + { url = "https://files.pythonhosted.org/packages/f1/b8/b3cb95375f05d33801024079b9392a5ab45267a63400bf1866e7ce0f0de4/pydantic_core-2.33.2-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:d87c561733f66531dced0da6e864f44ebf89a8fba55f31407b00c2f7f9449593", size = 1859123, upload-time = "2025-04-23T18:33:16.555Z" }, + { url = "https://files.pythonhosted.org/packages/05/bc/0d0b5adeda59a261cd30a1235a445bf55c7e46ae44aea28f7bd6ed46e091/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82865531efd18d6e07a04a17331af02cb7a651583c418df8266f17a63c6612", size = 1892852, upload-time = "2025-04-23T18:33:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/3e/11/d37bdebbda2e449cb3f519f6ce950927b56d62f0b84fd9cb9e372a26a3d5/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bfb5112df54209d820d7bf9317c7a6c9025ea52e49f46b6a2060104bba37de7", size = 2067484, upload-time = "2025-04-23T18:33:20.475Z" }, + { url = "https://files.pythonhosted.org/packages/8c/55/1f95f0a05ce72ecb02a8a8a1c3be0579bbc29b1d5ab68f1378b7bebc5057/pydantic_core-2.33.2-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:64632ff9d614e5eecfb495796ad51b0ed98c453e447a76bcbeeb69615079fc7e", size = 2108896, upload-time = "2025-04-23T18:33:22.501Z" }, + { url = "https://files.pythonhosted.org/packages/53/89/2b2de6c81fa131f423246a9109d7b2a375e83968ad0800d6e57d0574629b/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f889f7a40498cc077332c7ab6b4608d296d852182211787d4f3ee377aaae66e8", size = 2069475, upload-time = "2025-04-23T18:33:24.528Z" }, + { url = "https://files.pythonhosted.org/packages/b8/e9/1f7efbe20d0b2b10f6718944b5d8ece9152390904f29a78e68d4e7961159/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:de4b83bb311557e439b9e186f733f6c645b9417c84e2eb8203f3f820a4b988bf", size = 2239013, upload-time = "2025-04-23T18:33:26.621Z" }, + { url = "https://files.pythonhosted.org/packages/3c/b2/5309c905a93811524a49b4e031e9851a6b00ff0fb668794472ea7746b448/pydantic_core-2.33.2-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:82f68293f055f51b51ea42fafc74b6aad03e70e191799430b90c13d643059ebb", size = 2238715, upload-time = "2025-04-23T18:33:28.656Z" }, + { url = "https://files.pythonhosted.org/packages/32/56/8a7ca5d2cd2cda1d245d34b1c9a942920a718082ae8e54e5f3e5a58b7add/pydantic_core-2.33.2-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:329467cecfb529c925cf2bbd4d60d2c509bc2fb52a20c1045bf09bb70971a9c1", size = 2066757, upload-time = "2025-04-23T18:33:30.645Z" }, +] + [[package]] name = "pygments" version = "2.19.1" @@ -1206,6 +1655,46 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" }, ] +[[package]] +name = "pyiceberg" +version = "0.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cachetools" }, + { name = "click" }, + { name = "fsspec" }, + { name = "mmh3" }, + { name = "pydantic" }, + { name = "pyparsing" }, + { name = "requests" }, + { name = "rich" }, + { name = "sortedcontainers" }, + { name = "strictyaml" }, + { name = "tenacity" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bd/6a/6c1ac381ff0b8e03a9abc2f05722f6002d7452a2c05118697b3f3910e171/pyiceberg-0.9.1.tar.gz", hash = "sha256:3634134ce33859a441768b39df179b2c6f3de2bbbf506622884f553b013ee799", size = 617629, upload-time = "2025-04-30T14:59:34.306Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/5d/bb10c86b85895d4ba471b8a0e187031d4aaa82592a639242b83dd9354861/pyiceberg-0.9.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a183d9217eb82159c01b23c683057f96c8b2375f592b921721d1c157895e2df", size = 527097, upload-time = "2025-04-30T14:58:52.39Z" }, + { url = "https://files.pythonhosted.org/packages/ec/b9/1d6f0d334bc51cd64a58b7320d521e54af3810a6bd748fe2e89db1ad8d5f/pyiceberg-0.9.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:57030bb15c397b0379242907c5611f5b4338fb799e972353fd0edafde6cfd2ef", size = 523267, upload-time = "2025-04-30T14:58:53.978Z" }, + { url = "https://files.pythonhosted.org/packages/02/f5/bd43a9c1d2cd3aeb987cbf2b7f25e2b10306fa81522ea00df250fb23cc84/pyiceberg-0.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ba4cd9a8f6a04cfbc68e0c83f2db3ffd14244da8601a142cc05965d4b343645", size = 838616, upload-time = "2025-04-30T14:58:55.252Z" }, + { url = "https://files.pythonhosted.org/packages/d0/01/c68f9e03413dc983ddadc2c471038af2ff792449fc451731f58a958a7696/pyiceberg-0.9.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d5a48c6a2016d0dcde8c9079cc5e6b6d2e2ac663eddfe4697e7ea03a0edc40b7", size = 838290, upload-time = "2025-04-30T14:58:56.412Z" }, + { url = "https://files.pythonhosted.org/packages/ab/80/b7cba54a33b8b7be3655ff656d6bb8594fec0316eec5cafa231ec7f6ff74/pyiceberg-0.9.1-cp310-cp310-win_amd64.whl", hash = "sha256:8bebfa5a804a95a9f3d98d88cbeb37430b09add04592238bba2a2b2e0466d60d", size = 523612, upload-time = "2025-04-30T14:58:59.507Z" }, + { url = "https://files.pythonhosted.org/packages/f6/75/c8b4ebba7d345b5e736ebf4976121b97dd7091dcad401a17ca57152704c5/pyiceberg-0.9.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0e75c502dd56ac3d77036ce8a3b2566348da5ff4367c7c671981616ef6dcc883", size = 566274, upload-time = "2025-04-30T14:59:00.626Z" }, + { url = "https://files.pythonhosted.org/packages/e0/a0/9494c7930e5e4dc951d95abba584d8ffdb7403368398796ede21ff25c26f/pyiceberg-0.9.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0a8189c9b3ba81dd12493d6bb874a656a4d4909904552b97a629d1d43b3a0e90", size = 560157, upload-time = "2025-04-30T14:59:02.082Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d4/351776b1ae83de187d7cf37b100f4f124c7a71e35337182d3aef308156d1/pyiceberg-0.9.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c03065d5c5b704444ab8fb18cdd232ec43994db95b9e53444008ebc2cf9dc2c", size = 1052290, upload-time = "2025-04-30T14:59:03.232Z" }, + { url = "https://files.pythonhosted.org/packages/40/17/d8fea681afb52f20bf6a640f9044dcf621a47165f67cc5320bf3c6e82e4e/pyiceberg-0.9.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:93f2586a5da737de6e4643bf096a01772f068d1eedb7ffde6b36c60b6b9e6bd3", size = 1047503, upload-time = "2025-04-30T14:59:04.38Z" }, + { url = "https://files.pythonhosted.org/packages/d0/e0/d173fc2aa8dc252d7aac71703ba2c0491e4988b3a160cf5abb531cfb9086/pyiceberg-0.9.1-cp311-cp311-win_amd64.whl", hash = "sha256:94e45c10051110ba7a43b85a1f0a680b4a31d1d6cee593c8e62e14d22d18c47d", size = 559491, upload-time = "2025-04-30T14:59:05.615Z" }, + { url = "https://files.pythonhosted.org/packages/52/26/77983c2884b4a5f13f8a35e5c5e762ae699f6c511efd16730ab883000c1b/pyiceberg-0.9.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b8a958e3bbe919026533cee1f0fb6b7040928fce8d42c2ecea228de7c17578fa", size = 605755, upload-time = "2025-04-30T14:59:07.087Z" }, + { url = "https://files.pythonhosted.org/packages/6d/67/e6ea7fcc43aebc85aea5a67a69d01c9015283478061c3121b6b8aa158ce4/pyiceberg-0.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7e956b35c6822600c45fd8f3ea8cfea328cc406fefa534afeb6fdb325d05406", size = 597325, upload-time = "2025-04-30T14:59:08.644Z" }, + { url = "https://files.pythonhosted.org/packages/7f/cf/178a9f63fac1bfdd13bc85169e7ab903955d082e2cd80507b1921a6f64dc/pyiceberg-0.9.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e4e585164d7d86f5c9a609a1bc2abeae2f0ea0680a11a2064d3a945866b5311", size = 1277399, upload-time = "2025-04-30T14:59:10.193Z" }, + { url = "https://files.pythonhosted.org/packages/d1/6b/78d1739eb1d5b18529ee438aed75dac3e0b246f5e4d800931f9d1e37cda2/pyiceberg-0.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5fee08dac30e8524526f7d18468f9670f8606905b850b261314c597c6633f3b4", size = 1269083, upload-time = "2025-04-30T14:59:11.964Z" }, + { url = "https://files.pythonhosted.org/packages/67/69/c0087d19c8d8e8530acee3ba485d54aedeebf2963784a16692ca4b439566/pyiceberg-0.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:124793c54a0c2fb5ac4ab19c38da116c068e277c85cbaa7e4064e635a70b595e", size = 595512, upload-time = "2025-04-30T14:59:14.464Z" }, + { url = "https://files.pythonhosted.org/packages/aa/62/0153ed3a39d6f4b3235d430123703d4684eec7ba780404bbc118ace7406a/pyiceberg-0.9.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:77aec1c77d675603e0c5358e74adcae8d13b323753d702011be3f309d26af355", size = 668261, upload-time = "2025-04-30T14:59:21.751Z" }, + { url = "https://files.pythonhosted.org/packages/24/bd/c4cec142686dd8124032c69b6b02ba3703abc114ce787d0f02088b1f43d8/pyiceberg-0.9.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cf567438bf6267bbb67fdfdfc72ac500d523725fca9a6a38f93e8acd4146190e", size = 657439, upload-time = "2025-04-30T14:59:23.304Z" }, + { url = "https://files.pythonhosted.org/packages/ae/74/bbfc70bb1857f9d55d06fee1330a0236876b8ae4aa6fc5d815e2c4fef4f7/pyiceberg-0.9.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5992db7c00d789a33ff117700d453126803e769507a5edeb79bb6510ff72fc00", size = 1352983, upload-time = "2025-04-30T14:59:25.023Z" }, + { url = "https://files.pythonhosted.org/packages/90/20/e33e1716d1368b2471b80d9f1e338110f1e781b34ebffc5e320523102ffc/pyiceberg-0.9.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c9e460fca26162a3822c0e8d50b49c80928a0e35cb41698748d7a26f8c016215", size = 657563, upload-time = "2025-04-30T14:59:27.004Z" }, +] + [[package]] name = "pyparsing" version = "3.2.3" @@ -1266,6 +1755,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "pytz" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884, upload-time = "2025-03-25T02:25:00.538Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, +] + [[package]] name = "pywin32" version = "310" @@ -1285,6 +1783,50 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b4/f4/f785020090fb050e7fb6d34b780f2231f302609dc964672f72bfaeb59a28/pywin32-310-cp313-cp313-win_arm64.whl", hash = "sha256:e308f831de771482b7cf692a1f308f8fca701b2d8f9dde6cc440c7da17e47b33", size = 8458152, upload-time = "2025-03-17T00:56:07.819Z" }, ] +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/95/a3fac87cb7158e231b5a6012e438c647e1a87f09f8e0d123acec8ab8bf71/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", size = 184199, upload-time = "2024-08-06T20:31:40.178Z" }, + { url = "https://files.pythonhosted.org/packages/c7/7a/68bd47624dab8fd4afbfd3c48e3b79efe09098ae941de5b58abcbadff5cb/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", size = 171758, upload-time = "2024-08-06T20:31:42.173Z" }, + { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463, upload-time = "2024-08-06T20:31:44.263Z" }, + { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280, upload-time = "2024-08-06T20:31:50.199Z" }, + { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239, upload-time = "2024-08-06T20:31:52.292Z" }, + { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802, upload-time = "2024-08-06T20:31:53.836Z" }, + { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527, upload-time = "2024-08-06T20:31:55.565Z" }, + { url = "https://files.pythonhosted.org/packages/be/aa/5afe99233fb360d0ff37377145a949ae258aaab831bde4792b32650a4378/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", size = 144052, upload-time = "2024-08-06T20:31:56.914Z" }, + { url = "https://files.pythonhosted.org/packages/b5/84/0fa4b06f6d6c958d207620fc60005e241ecedceee58931bb20138e1e5776/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", size = 161774, upload-time = "2024-08-06T20:31:58.304Z" }, + { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612, upload-time = "2024-08-06T20:32:03.408Z" }, + { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040, upload-time = "2024-08-06T20:32:04.926Z" }, + { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829, upload-time = "2024-08-06T20:32:06.459Z" }, + { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167, upload-time = "2024-08-06T20:32:08.338Z" }, + { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952, upload-time = "2024-08-06T20:32:14.124Z" }, + { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301, upload-time = "2024-08-06T20:32:16.17Z" }, + { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638, upload-time = "2024-08-06T20:32:18.555Z" }, + { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850, upload-time = "2024-08-06T20:32:19.889Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980, upload-time = "2024-08-06T20:32:21.273Z" }, + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873, upload-time = "2024-08-06T20:32:25.131Z" }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302, upload-time = "2024-08-06T20:32:26.511Z" }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154, upload-time = "2024-08-06T20:32:28.363Z" }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223, upload-time = "2024-08-06T20:32:30.058Z" }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542, upload-time = "2024-08-06T20:32:31.881Z" }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164, upload-time = "2024-08-06T20:32:37.083Z" }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611, upload-time = "2024-08-06T20:32:38.898Z" }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591, upload-time = "2024-08-06T20:32:40.241Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload-time = "2024-08-06T20:32:43.4Z" }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679, upload-time = "2024-08-06T20:32:44.801Z" }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428, upload-time = "2024-08-06T20:32:46.432Z" }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361, upload-time = "2024-08-06T20:32:51.188Z" }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523, upload-time = "2024-08-06T20:32:53.019Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660, upload-time = "2024-08-06T20:32:54.708Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload-time = "2024-08-06T20:32:56.985Z" }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload-time = "2024-08-06T20:33:03.001Z" }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" }, +] + [[package]] name = "pyzmq" version = "26.4.0" @@ -1404,16 +1946,16 @@ wheels = [ [[package]] name = "rich" -version = "14.0.0" +version = "13.9.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078, upload-time = "2025-03-30T14:15:14.23Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149, upload-time = "2024-11-01T16:43:57.873Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload-time = "2025-03-30T14:15:12.283Z" }, + { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424, upload-time = "2024-11-01T16:43:55.817Z" }, ] [[package]] @@ -1459,6 +2001,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" }, ] +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, +] + [[package]] name = "stack-data" version = "0.6.3" @@ -1473,6 +2024,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521, upload-time = "2023-09-30T13:58:03.53Z" }, ] +[[package]] +name = "strictyaml" +version = "1.7.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/08/efd28d49162ce89c2ad61a88bd80e11fb77bc9f6c145402589112d38f8af/strictyaml-1.7.3.tar.gz", hash = "sha256:22f854a5fcab42b5ddba8030a0e4be51ca89af0267961c8d6cfa86395586c407", size = 115206, upload-time = "2023-03-10T12:50:27.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/7c/a81ef5ef10978dd073a854e0fa93b5d8021d0594b639cc8f6453c3c78a1d/strictyaml-1.7.3-py3-none-any.whl", hash = "sha256:fb5c8a4edb43bebb765959e420f9b3978d7f1af88c80606c03fb420888f5d1c7", size = 123917, upload-time = "2023-03-10T12:50:17.242Z" }, +] + +[[package]] +name = "tenacity" +version = "9.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" }, +] + [[package]] name = "tomli" version = "2.2.1" @@ -1531,6 +2103,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/55/a7/535c44c7bea4578e48281d83c615219f3ab19e6abc67625ef637c73987be/tornado-6.5.1-cp39-abi3-win_arm64.whl", hash = "sha256:02420a0eb7bf617257b9935e2b754d1b63897525d8a289c9d65690d580b4dcf7", size = 443596, upload-time = "2025-05-22T18:15:37.433Z" }, ] +[[package]] +name = "tqdm" +version = "4.67.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" }, +] + [[package]] name = "traitlets" version = "5.14.3" @@ -1549,6 +2133,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" }, ] +[[package]] +name = "typing-inspection" +version = "0.4.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f8/b1/0c11f5058406b3af7609f121aaa6b609744687f1d158b3c3a5bf4cc94238/typing_inspection-0.4.1.tar.gz", hash = "sha256:6ae134cc0203c33377d43188d4064e9b357dba58cff3185f22924610e70a9d28", size = 75726, upload-time = "2025-05-21T18:55:23.885Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/17/69/cd203477f944c353c31bade965f880aa1061fd6bf05ded0726ca845b6ff7/typing_inspection-0.4.1-py3-none-any.whl", hash = "sha256:389055682238f53b04f7badcb49b989835495a96700ced5dab2d8feae4b26f51", size = 14552, upload-time = "2025-05-21T18:55:22.152Z" }, +] + +[[package]] +name = "tzdata" +version = "2025.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380, upload-time = "2025-03-23T13:54:43.652Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839, upload-time = "2025-03-23T13:54:41.845Z" }, +] + [[package]] name = "urllib3" version = "2.4.0" @@ -1567,6 +2172,70 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" }, ] +[[package]] +name = "wrapt" +version = "1.17.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531, upload-time = "2025-01-14T10:35:45.465Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/d1/1daec934997e8b160040c78d7b31789f19b122110a75eca3d4e8da0049e1/wrapt-1.17.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3d57c572081fed831ad2d26fd430d565b76aa277ed1d30ff4d40670b1c0dd984", size = 53307, upload-time = "2025-01-14T10:33:13.616Z" }, + { url = "https://files.pythonhosted.org/packages/1b/7b/13369d42651b809389c1a7153baa01d9700430576c81a2f5c5e460df0ed9/wrapt-1.17.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5e251054542ae57ac7f3fba5d10bfff615b6c2fb09abeb37d2f1463f841ae22", size = 38486, upload-time = "2025-01-14T10:33:15.947Z" }, + { url = "https://files.pythonhosted.org/packages/62/bf/e0105016f907c30b4bd9e377867c48c34dc9c6c0c104556c9c9126bd89ed/wrapt-1.17.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80dd7db6a7cb57ffbc279c4394246414ec99537ae81ffd702443335a61dbf3a7", size = 38777, upload-time = "2025-01-14T10:33:17.462Z" }, + { url = "https://files.pythonhosted.org/packages/27/70/0f6e0679845cbf8b165e027d43402a55494779295c4b08414097b258ac87/wrapt-1.17.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a6e821770cf99cc586d33833b2ff32faebdbe886bd6322395606cf55153246c", size = 83314, upload-time = "2025-01-14T10:33:21.282Z" }, + { url = "https://files.pythonhosted.org/packages/0f/77/0576d841bf84af8579124a93d216f55d6f74374e4445264cb378a6ed33eb/wrapt-1.17.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b60fb58b90c6d63779cb0c0c54eeb38941bae3ecf7a73c764c52c88c2dcb9d72", size = 74947, upload-time = "2025-01-14T10:33:24.414Z" }, + { url = "https://files.pythonhosted.org/packages/90/ec/00759565518f268ed707dcc40f7eeec38637d46b098a1f5143bff488fe97/wrapt-1.17.2-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b870b5df5b71d8c3359d21be8f0d6c485fa0ebdb6477dda51a1ea54a9b558061", size = 82778, upload-time = "2025-01-14T10:33:26.152Z" }, + { url = "https://files.pythonhosted.org/packages/f8/5a/7cffd26b1c607b0b0c8a9ca9d75757ad7620c9c0a9b4a25d3f8a1480fafc/wrapt-1.17.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:4011d137b9955791f9084749cba9a367c68d50ab8d11d64c50ba1688c9b457f2", size = 81716, upload-time = "2025-01-14T10:33:27.372Z" }, + { url = "https://files.pythonhosted.org/packages/7e/09/dccf68fa98e862df7e6a60a61d43d644b7d095a5fc36dbb591bbd4a1c7b2/wrapt-1.17.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:1473400e5b2733e58b396a04eb7f35f541e1fb976d0c0724d0223dd607e0f74c", size = 74548, upload-time = "2025-01-14T10:33:28.52Z" }, + { url = "https://files.pythonhosted.org/packages/b7/8e/067021fa3c8814952c5e228d916963c1115b983e21393289de15128e867e/wrapt-1.17.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3cedbfa9c940fdad3e6e941db7138e26ce8aad38ab5fe9dcfadfed9db7a54e62", size = 81334, upload-time = "2025-01-14T10:33:29.643Z" }, + { url = "https://files.pythonhosted.org/packages/4b/0d/9d4b5219ae4393f718699ca1c05f5ebc0c40d076f7e65fd48f5f693294fb/wrapt-1.17.2-cp310-cp310-win32.whl", hash = "sha256:582530701bff1dec6779efa00c516496968edd851fba224fbd86e46cc6b73563", size = 36427, upload-time = "2025-01-14T10:33:30.832Z" }, + { url = "https://files.pythonhosted.org/packages/72/6a/c5a83e8f61aec1e1aeef939807602fb880e5872371e95df2137142f5c58e/wrapt-1.17.2-cp310-cp310-win_amd64.whl", hash = "sha256:58705da316756681ad3c9c73fd15499aa4d8c69f9fd38dc8a35e06c12468582f", size = 38774, upload-time = "2025-01-14T10:33:32.897Z" }, + { url = "https://files.pythonhosted.org/packages/cd/f7/a2aab2cbc7a665efab072344a8949a71081eed1d2f451f7f7d2b966594a2/wrapt-1.17.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ff04ef6eec3eee8a5efef2401495967a916feaa353643defcc03fc74fe213b58", size = 53308, upload-time = "2025-01-14T10:33:33.992Z" }, + { url = "https://files.pythonhosted.org/packages/50/ff/149aba8365fdacef52b31a258c4dc1c57c79759c335eff0b3316a2664a64/wrapt-1.17.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db983e7bca53819efdbd64590ee96c9213894272c776966ca6306b73e4affda", size = 38488, upload-time = "2025-01-14T10:33:35.264Z" }, + { url = "https://files.pythonhosted.org/packages/65/46/5a917ce85b5c3b490d35c02bf71aedaa9f2f63f2d15d9949cc4ba56e8ba9/wrapt-1.17.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9abc77a4ce4c6f2a3168ff34b1da9b0f311a8f1cfd694ec96b0603dff1c79438", size = 38776, upload-time = "2025-01-14T10:33:38.28Z" }, + { url = "https://files.pythonhosted.org/packages/ca/74/336c918d2915a4943501c77566db41d1bd6e9f4dbc317f356b9a244dfe83/wrapt-1.17.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b929ac182f5ace000d459c59c2c9c33047e20e935f8e39371fa6e3b85d56f4a", size = 83776, upload-time = "2025-01-14T10:33:40.678Z" }, + { url = "https://files.pythonhosted.org/packages/09/99/c0c844a5ccde0fe5761d4305485297f91d67cf2a1a824c5f282e661ec7ff/wrapt-1.17.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f09b286faeff3c750a879d336fb6d8713206fc97af3adc14def0cdd349df6000", size = 75420, upload-time = "2025-01-14T10:33:41.868Z" }, + { url = "https://files.pythonhosted.org/packages/b4/b0/9fc566b0fe08b282c850063591a756057c3247b2362b9286429ec5bf1721/wrapt-1.17.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a7ed2d9d039bd41e889f6fb9364554052ca21ce823580f6a07c4ec245c1f5d6", size = 83199, upload-time = "2025-01-14T10:33:43.598Z" }, + { url = "https://files.pythonhosted.org/packages/9d/4b/71996e62d543b0a0bd95dda485219856def3347e3e9380cc0d6cf10cfb2f/wrapt-1.17.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:129a150f5c445165ff941fc02ee27df65940fcb8a22a61828b1853c98763a64b", size = 82307, upload-time = "2025-01-14T10:33:48.499Z" }, + { url = "https://files.pythonhosted.org/packages/39/35/0282c0d8789c0dc9bcc738911776c762a701f95cfe113fb8f0b40e45c2b9/wrapt-1.17.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1fb5699e4464afe5c7e65fa51d4f99e0b2eadcc176e4aa33600a3df7801d6662", size = 75025, upload-time = "2025-01-14T10:33:51.191Z" }, + { url = "https://files.pythonhosted.org/packages/4f/6d/90c9fd2c3c6fee181feecb620d95105370198b6b98a0770cba090441a828/wrapt-1.17.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9a2bce789a5ea90e51a02dfcc39e31b7f1e662bc3317979aa7e5538e3a034f72", size = 81879, upload-time = "2025-01-14T10:33:52.328Z" }, + { url = "https://files.pythonhosted.org/packages/8f/fa/9fb6e594f2ce03ef03eddbdb5f4f90acb1452221a5351116c7c4708ac865/wrapt-1.17.2-cp311-cp311-win32.whl", hash = "sha256:4afd5814270fdf6380616b321fd31435a462019d834f83c8611a0ce7484c7317", size = 36419, upload-time = "2025-01-14T10:33:53.551Z" }, + { url = "https://files.pythonhosted.org/packages/47/f8/fb1773491a253cbc123c5d5dc15c86041f746ed30416535f2a8df1f4a392/wrapt-1.17.2-cp311-cp311-win_amd64.whl", hash = "sha256:acc130bc0375999da18e3d19e5a86403667ac0c4042a094fefb7eec8ebac7cf3", size = 38773, upload-time = "2025-01-14T10:33:56.323Z" }, + { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799, upload-time = "2025-01-14T10:33:57.4Z" }, + { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821, upload-time = "2025-01-14T10:33:59.334Z" }, + { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919, upload-time = "2025-01-14T10:34:04.093Z" }, + { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721, upload-time = "2025-01-14T10:34:07.163Z" }, + { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899, upload-time = "2025-01-14T10:34:09.82Z" }, + { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222, upload-time = "2025-01-14T10:34:11.258Z" }, + { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707, upload-time = "2025-01-14T10:34:12.49Z" }, + { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685, upload-time = "2025-01-14T10:34:15.043Z" }, + { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567, upload-time = "2025-01-14T10:34:16.563Z" }, + { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672, upload-time = "2025-01-14T10:34:17.727Z" }, + { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865, upload-time = "2025-01-14T10:34:19.577Z" }, + { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800, upload-time = "2025-01-14T10:34:21.571Z" }, + { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824, upload-time = "2025-01-14T10:34:22.999Z" }, + { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920, upload-time = "2025-01-14T10:34:25.386Z" }, + { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690, upload-time = "2025-01-14T10:34:28.058Z" }, + { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861, upload-time = "2025-01-14T10:34:29.167Z" }, + { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174, upload-time = "2025-01-14T10:34:31.702Z" }, + { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721, upload-time = "2025-01-14T10:34:32.91Z" }, + { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763, upload-time = "2025-01-14T10:34:34.903Z" }, + { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585, upload-time = "2025-01-14T10:34:36.13Z" }, + { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676, upload-time = "2025-01-14T10:34:37.962Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871, upload-time = "2025-01-14T10:34:39.13Z" }, + { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312, upload-time = "2025-01-14T10:34:40.604Z" }, + { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062, upload-time = "2025-01-14T10:34:45.011Z" }, + { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155, upload-time = "2025-01-14T10:34:47.25Z" }, + { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471, upload-time = "2025-01-14T10:34:50.934Z" }, + { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208, upload-time = "2025-01-14T10:34:52.297Z" }, + { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339, upload-time = "2025-01-14T10:34:53.489Z" }, + { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232, upload-time = "2025-01-14T10:34:55.327Z" }, + { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476, upload-time = "2025-01-14T10:34:58.055Z" }, + { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377, upload-time = "2025-01-14T10:34:59.3Z" }, + { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986, upload-time = "2025-01-14T10:35:00.498Z" }, + { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750, upload-time = "2025-01-14T10:35:03.378Z" }, + { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594, upload-time = "2025-01-14T10:35:44.018Z" }, +] + [[package]] name = "xxhash" version = "3.5.0"