googleapis · chandra-siri · Dec 3, 2025 · Dec 23, 2025 · Dec 27, 2025 · Dec 27, 2025
@@ -62,3 +62,6 @@ system_tests/local_test_setup
 # Make sure a generated file isn't accidentally committed.
 pylintrc
 pylintrc.test
+
+# Benchmarking results and logs
+__benchmark_results__/**
@@ -0,0 +1,13 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
@@ -0,0 +1,163 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, List
+import statistics
+import io
+import os
+
+
+def publish_benchmark_extra_info(
+    benchmark: Any,
+    params: Any,
+    benchmark_group: str = "read",
+    true_times: List[float] = [],
+) -> None:
+    """
+    Helper function to publish benchmark parameters to the extra_info property.
+    """
+
+    benchmark.extra_info["num_files"] = params.num_files
+    benchmark.extra_info["file_size"] = params.file_size_bytes
+    benchmark.extra_info["chunk_size"] = params.chunk_size_bytes
+    if benchmark_group == "write":
+        benchmark.extra_info["pattern"] = "seq"
+    else:
+        benchmark.extra_info["pattern"] = params.pattern
+    benchmark.extra_info["coros"] = params.num_coros
+    benchmark.extra_info["rounds"] = params.rounds
+    benchmark.extra_info["bucket_name"] = params.bucket_name
+    benchmark.extra_info["bucket_type"] = params.bucket_type
+    benchmark.extra_info["processes"] = params.num_processes
+    benchmark.group = benchmark_group
+
+    object_size = params.file_size_bytes
+    num_files = params.num_files
+    total_uploaded_mib = (object_size / (1024 * 1024) * num_files)
+    min_throughput = total_uploaded_mib / benchmark.stats["max"]
+    max_throughput = total_uploaded_mib / benchmark.stats["min"]
+    mean_throughput = total_uploaded_mib / benchmark.stats["mean"]
+    median_throughput = total_uploaded_mib / benchmark.stats["median"]
+
+    benchmark.extra_info["throughput_MiB_s_min"] = min_throughput
+    benchmark.extra_info["throughput_MiB_s_max"] = max_throughput
+    benchmark.extra_info["throughput_MiB_s_mean"] = mean_throughput
+    benchmark.extra_info["throughput_MiB_s_median"] = median_throughput
+
+    print("\nThroughput Statistics (MiB/s):")
+    print(f"  Min:    {min_throughput:.2f} (from max time)")
+    print(f"  Max:    {max_throughput:.2f} (from min time)")
+    print(f"  Mean:   {mean_throughput:.2f} (approx, from mean time)")
+    print(f"  Median: {median_throughput:.2f} (approx, from median time)")
+
+    if true_times:
+        throughputs = [total_uploaded_mib / t for t in true_times]
+        true_min_throughput = min(throughputs)
+        true_max_throughput = max(throughputs)
+        true_mean_throughput = statistics.mean(throughputs)
+        true_median_throughput = statistics.median(throughputs)
+
+        benchmark.extra_info["true_throughput_MiB_s_min"] = true_min_throughput
+        benchmark.extra_info["true_throughput_MiB_s_max"] = true_max_throughput
+        benchmark.extra_info["true_throughput_MiB_s_mean"] = true_mean_throughput
+        benchmark.extra_info["true_throughput_MiB_s_median"] = true_median_throughput
+
+        print("\nThroughput Statistics from true_times (MiB/s):")
+        print(f"  Min:    {true_min_throughput:.2f}")
+        print(f"  Max:    {true_max_throughput:.2f}")
+        print(f"  Mean:   {true_mean_throughput:.2f}")
+        print(f"  Median: {true_median_throughput:.2f}")
+
+    # Get benchmark name, rounds, and iterations
+    name = benchmark.name
+    rounds = benchmark.stats['rounds']
+    iterations = benchmark.stats['iterations']
+
+    # Header for throughput table
+    header = "\n\n" + "-" * 125 + "\n"
+    header += "Throughput Benchmark (MiB/s)\n"
+    header += "-" * 125 + "\n"
+    header += f"{'Name':<50} {'Min':>10} {'Max':>10} {'Mean':>10} {'StdDev':>10} {'Median':>10} {'Rounds':>8} {'Iterations':>12}\n"
+    header += "-" * 125
+
+    # Data row for throughput table
+    # The table headers (Min, Max) refer to the throughput values.
+    row = f"{name:<50} {min_throughput:>10.4f} {max_throughput:>10.4f} {mean_throughput:>10.4f} {'N/A':>10} {median_throughput:>10.4f} {rounds:>8} {iterations:>12}"
+
+    print(header)
+    print(row)
+    print("-" * 125)
+
+class RandomBytesIO(io.RawIOBase):
+    """
+    A file-like object that generates random bytes using os.urandom.
+    It enforces a fixed size and an upper safety cap.
+    """
+    # 10 GiB default safety cap
+    DEFAULT_CAP = 10 * 1024 * 1024 * 1024 
+
+    def __init__(self, size, max_size=DEFAULT_CAP):
+        """
+        Args:
+            size (int): The exact size of the virtual file in bytes.
+            max_size (int): The maximum allowed size to prevent safety issues.
+        """
+        if size is None:
+            raise ValueError("Size must be defined (cannot be infinite).")
+
+        if size > max_size:
+            raise ValueError(f"Requested size {size} exceeds the maximum limit of {max_size} bytes (10 GiB).")
+
+        self._size = size
+        self._pos = 0
+
+    def read(self, n=-1):
+        # 1. Handle "read all" (n=-1)
+        if n is None or n < 0:
+            n = self._size - self._pos
+
+        # 2. Handle EOF (End of File)
+        if self._pos >= self._size:
+            return b""
+
+        # 3. Clamp read amount to remaining size
+        # This ensures we stop exactly at `size` bytes.
+        n = min(n, self._size - self._pos)
+
+        # 4. Generate data
+        data = os.urandom(n)
+        self._pos += len(data)
+        return data
+
+    def readable(self):
+        return True
+
+    def seekable(self):
+        return True
+
+    def tell(self):
+        return self._pos
+
+    def seek(self, offset, whence=io.SEEK_SET):
+        if whence == io.SEEK_SET:
+            new_pos = offset
+        elif whence == io.SEEK_CUR:
+            new_pos = self._pos + offset
+        elif whence == io.SEEK_END:
+            new_pos = self._size + offset
+        else:
+            raise ValueError(f"Invalid whence: {whence}")
+
+        # Clamp position to valid range [0, size]
+        self._pos = max(0, min(new_pos, self._size))
+        return self._pos
@@ -0,0 +1,144 @@
+# Copyright 2026 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import contextlib
+from typing import Any
+from tests.perf.microbenchmarks.resource_monitor import ResourceMonitor
+import pytest
+from tests.system._helpers import delete_blob
+
+import asyncio
+import multiprocessing
+import os
+import uuid
+from google.cloud import storage
+from google.cloud.storage._experimental.asyncio.async_appendable_object_writer import (
+    AsyncAppendableObjectWriter,
+)
+from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient
+
+_OBJECT_NAME_PREFIX = "micro-benchmark"
+
+
+@pytest.fixture(scope="function")
+def blobs_to_delete():
+    blobs_to_delete = []
+
+    yield blobs_to_delete
+
+    for blob in blobs_to_delete:
+        delete_blob(blob)
+
+
+@pytest.fixture(scope="session")
+def storage_client():
+    from google.cloud.storage import Client
+
+    client = Client()
+    with contextlib.closing(client):
+        yield client
+
+@pytest.fixture
+def monitor():
+    """
+    Provides the ResourceMonitor class.
+    Usage: with monitor() as m: ...
+    """
+    return ResourceMonitor
+
+def publish_resource_metrics(benchmark: Any, monitor: ResourceMonitor) -> None:
+    """
+    Helper function to publish resource monitor results to the extra_info property.
+    """
+    benchmark.extra_info.update(
+        {
+            "cpu_max_global": f"{monitor.max_cpu:.2f}",
+            "mem_max": f"{monitor.max_mem:.2f}",
+            "net_throughput_mb_s": f"{monitor.throughput_mb_s:.2f}",
+            "vcpus": monitor.vcpus,
+        }
+    )
+
+
+async def upload_appendable_object(bucket_name, object_name, object_size, chunk_size):
+    # flush interval set to little over 1GiB to minimize number of flushes.
+    # this method is to write "appendable" objects which will be used for 
+    # benchmarking reads, hence not concerned performance of writes here.
+    writer = AsyncAppendableObjectWriter(
+        AsyncGrpcClient().grpc_client, bucket_name, object_name, writer_options={"FLUSH_INTERVAL_BYTES": 1026 * 1024 ** 2}
+    )
+    await writer.open()
+    uploaded_bytes = 0
+    while uploaded_bytes < object_size:
+        bytes_to_upload = min(chunk_size, object_size - uploaded_bytes)
+        await writer.append(os.urandom(bytes_to_upload))
+        uploaded_bytes += bytes_to_upload
+    object_metdata = await writer.close(finalize_on_close=True)
+    assert object_metdata.size == uploaded_bytes
+    return uploaded_bytes
+
+
+def upload_simple_object(bucket_name, object_name, object_size, chunk_size):
+    storage_client = storage.Client()
+    bucket = storage_client.bucket(bucket_name)
+    blob = bucket.blob(object_name)
+    blob.chunk_size = chunk_size
+    data = os.urandom(object_size)
+    blob.upload_from_string(data)
+    return object_size
+
+
+def _upload_worker(args):
+    bucket_name, object_name, object_size, chunk_size, bucket_type = args
+    if bucket_type == "zonal":
+        uploaded_bytes = asyncio.run(
+            upload_appendable_object(bucket_name, object_name, object_size, chunk_size)
+        )
+    else:
+        uploaded_bytes = upload_simple_object(bucket_name, object_name, object_size, chunk_size)
+    return object_name, uploaded_bytes
+
+
+def _create_files(num_files, bucket_name, bucket_type, object_size, chunk_size=1024 * 1024 * 1024):
+    """
+    Create/Upload objects for benchmarking and return a list of their names.
+    """
+    object_names = [
+        f"{_OBJECT_NAME_PREFIX}-{uuid.uuid4().hex[:5]}" for _ in range(num_files)
+    ]
+
+    args_list = [
+        (bucket_name, object_names[i], object_size, chunk_size, bucket_type)
+        for i in range(num_files)
+    ]
+
+    ctx = multiprocessing.get_context("spawn")
+    with ctx.Pool() as pool:
+        results = pool.map(_upload_worker, args_list)
+
+    total_uploaded_bytes = sum(r[1] for r in results)
+    assert total_uploaded_bytes == object_size * num_files
+
+    return [r[0] for r in results]
+
+
+@pytest.fixture
+def workload_params(request):
+    params = request.param
+    files_names = _create_files(
+        params.num_files,
+        params.bucket_name,
+        params.bucket_type,
+        params.file_size_bytes,
+    )
+    return params, files_names