From 0159f474c6bbc15f20d52bc946bd252bd852b196 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 30 Dec 2025 09:11:27 +0500 Subject: [PATCH 1/9] set up folder structure and base code --- openml/_api/__init__.py | 8 +++ openml/_api/config.py | 5 ++ openml/_api/http/__init__.py | 1 + openml/_api/http/client.py | 23 ++++++ openml/_api/http/utils.py | 0 openml/_api/resources/__init__.py | 2 + openml/_api/resources/base.py | 22 ++++++ openml/_api/resources/datasets.py | 13 ++++ openml/_api/resources/tasks.py | 113 ++++++++++++++++++++++++++++++ openml/_api/runtime/core.py | 58 +++++++++++++++ openml/_api/runtime/fallback.py | 5 ++ openml/tasks/functions.py | 8 ++- 12 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 openml/_api/__init__.py create mode 100644 openml/_api/config.py create mode 100644 openml/_api/http/__init__.py create mode 100644 openml/_api/http/client.py create mode 100644 openml/_api/http/utils.py create mode 100644 openml/_api/resources/__init__.py create mode 100644 openml/_api/resources/base.py create mode 100644 openml/_api/resources/datasets.py create mode 100644 openml/_api/resources/tasks.py create mode 100644 openml/_api/runtime/core.py create mode 100644 openml/_api/runtime/fallback.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py new file mode 100644 index 000000000..5089f94dd --- /dev/null +++ b/openml/_api/__init__.py @@ -0,0 +1,8 @@ +from openml._api.runtime.core import APIContext + + +def set_api_version(version: str, strict=False): + api_context.set_version(version=version, strict=strict) + + +api_context = APIContext() diff --git a/openml/_api/config.py b/openml/_api/config.py new file mode 100644 index 000000000..bd93c3cad --- /dev/null +++ b/openml/_api/config.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +API_V1_SERVER = "https://www.openml.org/api/v1/xml" +API_V2_SERVER = "http://127.0.0.1:8001" +API_KEY = "..." diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py new file mode 100644 index 000000000..fde2a5b0a --- /dev/null +++ b/openml/_api/http/__init__.py @@ -0,0 +1 @@ +from openml._api.http.client import HTTPClient diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py new file mode 100644 index 000000000..81a9213e3 --- /dev/null +++ b/openml/_api/http/client.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import requests + +from openml.__version__ import __version__ + + +class HTTPClient: + def __init__(self, base_url: str): + self.base_url = base_url + self.headers = {"user-agent": f"openml-python/{__version__}"} + + def get(self, path, params=None): + url = f"{self.base_url}/{path}" + return requests.get(url, params=params, headers=self.headers) + + def post(self, path, data=None, files=None): + url = f"{self.base_url}/{path}" + return requests.post(url, data=data, files=files, headers=self.headers) + + def delete(self, path, params=None): + url = f"{self.base_url}/{path}" + return requests.delete(url, params=params, headers=self.headers) diff --git a/openml/_api/http/utils.py b/openml/_api/http/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py new file mode 100644 index 000000000..078fc5998 --- /dev/null +++ b/openml/_api/resources/__init__.py @@ -0,0 +1,2 @@ +from openml._api.resources.datasets import DatasetsV1, DatasetsV2 +from openml._api.resources.tasks import TasksV1, TasksV2 diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py new file mode 100644 index 000000000..1fae27665 --- /dev/null +++ b/openml/_api/resources/base.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.http import HTTPClient + + +class ResourceAPI: + def __init__(self, http: HTTPClient): + self._http = http + + +class DatasetsAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, id: int) -> dict: ... + + +class TasksAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, id: int) -> dict: ... diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py new file mode 100644 index 000000000..cd1bb595a --- /dev/null +++ b/openml/_api/resources/datasets.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from openml._api.resources.base import DatasetsAPI + + +class DatasetsV1(DatasetsAPI): + def get(self, id): + pass + + +class DatasetsV2(DatasetsAPI): + def get(self, id): + pass diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py new file mode 100644 index 000000000..b0e9afbf8 --- /dev/null +++ b/openml/_api/resources/tasks.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import xmltodict + +from openml._api.resources.base import TasksAPI +from openml.tasks.task import ( + OpenMLClassificationTask, + OpenMLClusteringTask, + OpenMLLearningCurveTask, + OpenMLRegressionTask, + OpenMLTask, + TaskType, +) + + +class TasksV1(TasksAPI): + def get(self, id, return_response=False): + path = f"task/{id}" + response = self._http.get(path) + xml_content = response.content + task = self._create_task_from_xml(xml_content) + + if return_response: + return task, response + + return task + + def _create_task_from_xml(self, xml: str) -> OpenMLTask: + """Create a task given a xml string. + + Parameters + ---------- + xml : string + Task xml representation. + + Returns + ------- + OpenMLTask + """ + dic = xmltodict.parse(xml)["oml:task"] + estimation_parameters = {} + inputs = {} + # Due to the unordered structure we obtain, we first have to extract + # the possible keys of oml:input; dic["oml:input"] is a list of + # OrderedDicts + + # Check if there is a list of inputs + if isinstance(dic["oml:input"], list): + for input_ in dic["oml:input"]: + name = input_["@name"] + inputs[name] = input_ + # Single input case + elif isinstance(dic["oml:input"], dict): + name = dic["oml:input"]["@name"] + inputs[name] = dic["oml:input"] + + evaluation_measures = None + if "evaluation_measures" in inputs: + evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][ + "oml:evaluation_measure" + ] + + task_type = TaskType(int(dic["oml:task_type_id"])) + common_kwargs = { + "task_id": dic["oml:task_id"], + "task_type": dic["oml:task_type"], + "task_type_id": task_type, + "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], + "evaluation_measure": evaluation_measures, + } + # TODO: add OpenMLClusteringTask? + if task_type in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ): + # Convert some more parameters + for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ + "oml:parameter" + ]: + name = parameter["@name"] + text = parameter.get("#text", "") + estimation_parameters[name] = text + + common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:type"] + common_kwargs["estimation_procedure_id"] = int( + inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"] + ) + + common_kwargs["estimation_parameters"] = estimation_parameters + common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][ + "oml:target_feature" + ] + common_kwargs["data_splits_url"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:data_splits_url"] + + cls = { + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }.get(task_type) + if cls is None: + raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") + return cls(**common_kwargs) # type: ignore + + +class TasksV2(TasksAPI): + def get(self, id): + pass diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py new file mode 100644 index 000000000..80f35587c --- /dev/null +++ b/openml/_api/runtime/core.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from openml._api.config import ( + API_V1_SERVER, + API_V2_SERVER, +) +from openml._api.http.client import HTTPClient +from openml._api.resources import ( + DatasetsV1, + DatasetsV2, + TasksV1, + TasksV2, +) +from openml._api.runtime.fallback import FallbackProxy + + +class APIBackend: + def __init__(self, *, datasets, tasks): + self.datasets = datasets + self.tasks = tasks + + +def build_backend(version: str, strict: bool) -> APIBackend: + v1_http = HTTPClient(API_V1_SERVER) + v2_http = HTTPClient(API_V2_SERVER) + + v1 = APIBackend( + datasets=DatasetsV1(v1_http), + tasks=TasksV1(v1_http), + ) + + if version == "v1": + return v1 + + v2 = APIBackend( + datasets=DatasetsV2(v2_http), + tasks=TasksV2(v2_http), + ) + + if strict: + return v2 + + return APIBackend( + datasets=FallbackProxy(v2.datasets, v1.datasets), + tasks=FallbackProxy(v2.tasks, v1.tasks), + ) + + +class APIContext: + def __init__(self): + self._backend = build_backend("v1", strict=False) + + def set_version(self, version: str, strict: bool = False): + self._backend = build_backend(version, strict) + + @property + def backend(self): + return self._backend diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py new file mode 100644 index 000000000..56e96a966 --- /dev/null +++ b/openml/_api/runtime/fallback.py @@ -0,0 +1,5 @@ +from __future__ import annotations + + +class FallbackProxy: + pass diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index d2bf5e946..91be65965 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -12,6 +12,7 @@ import openml._api_calls import openml.utils +from openml._api import api_context from openml.datasets import get_dataset from openml.exceptions import OpenMLCacheException @@ -442,11 +443,12 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get") + task, response = api_context.backend.tasks.get(task_id, return_response=True) with xml_file.open("w", encoding="utf8") as fh: - fh.write(task_xml) - return _create_task_from_xml(task_xml) + fh.write(response.text) + + return task def _create_task_from_xml(xml: str) -> OpenMLTask: From 52ef37999fad8509e5e85b8512e442bd9dc69e04 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Mon, 5 Jan 2026 12:48:58 +0500 Subject: [PATCH 2/9] fix pre-commit --- openml/_api/__init__.py | 2 +- openml/_api/http/__init__.py | 2 ++ openml/_api/http/client.py | 32 +++++++++++++++++++++++-------- openml/_api/resources/__init__.py | 2 ++ openml/_api/resources/base.py | 13 +++++++++++-- openml/_api/resources/datasets.py | 15 +++++++++++---- openml/_api/resources/tasks.py | 25 +++++++++++++++++++----- openml/_api/runtime/__init__.py | 0 openml/_api/runtime/core.py | 23 +++++++++++----------- openml/_api/runtime/fallback.py | 9 ++++++++- openml/tasks/functions.py | 12 ++++++++---- 11 files changed, 99 insertions(+), 36 deletions(-) create mode 100644 openml/_api/runtime/__init__.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py index 5089f94dd..881f40671 100644 --- a/openml/_api/__init__.py +++ b/openml/_api/__init__.py @@ -1,7 +1,7 @@ from openml._api.runtime.core import APIContext -def set_api_version(version: str, strict=False): +def set_api_version(version: str, *, strict: bool = False) -> None: api_context.set_version(version=version, strict=strict) diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py index fde2a5b0a..8e6d1e4ce 100644 --- a/openml/_api/http/__init__.py +++ b/openml/_api/http/__init__.py @@ -1 +1,3 @@ from openml._api.http.client import HTTPClient + +__all__ = ["HTTPClient"] diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index 81a9213e3..dea5de809 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,23 +1,39 @@ from __future__ import annotations +from typing import Any, Mapping + import requests +from requests import Response from openml.__version__ import __version__ class HTTPClient: - def __init__(self, base_url: str): + def __init__(self, base_url: str) -> None: self.base_url = base_url - self.headers = {"user-agent": f"openml-python/{__version__}"} + self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - def get(self, path, params=None): + def get( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: url = f"{self.base_url}/{path}" - return requests.get(url, params=params, headers=self.headers) + return requests.get(url, params=params, headers=self.headers, timeout=10) - def post(self, path, data=None, files=None): + def post( + self, + path: str, + data: Mapping[str, Any] | None = None, + files: Any = None, + ) -> Response: url = f"{self.base_url}/{path}" - return requests.post(url, data=data, files=files, headers=self.headers) + return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) - def delete(self, path, params=None): + def delete( + self, + path: str, + params: Mapping[str, Any] | None = None, + ) -> Response: url = f"{self.base_url}/{path}" - return requests.delete(url, params=params, headers=self.headers) + return requests.delete(url, params=params, headers=self.headers, timeout=10) diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py index 078fc5998..b1af3c1a8 100644 --- a/openml/_api/resources/__init__.py +++ b/openml/_api/resources/__init__.py @@ -1,2 +1,4 @@ from openml._api.resources.datasets import DatasetsV1, DatasetsV2 from openml._api.resources.tasks import TasksV1, TasksV2 + +__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"] diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py index 1fae27665..6fbf8977d 100644 --- a/openml/_api/resources/base.py +++ b/openml/_api/resources/base.py @@ -4,7 +4,11 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: + from requests import Response + from openml._api.http import HTTPClient + from openml.datasets.dataset import OpenMLDataset + from openml.tasks.task import OpenMLTask class ResourceAPI: @@ -14,9 +18,14 @@ def __init__(self, http: HTTPClient): class DatasetsAPI(ResourceAPI, ABC): @abstractmethod - def get(self, id: int) -> dict: ... + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ... class TasksAPI(ResourceAPI, ABC): @abstractmethod - def get(self, id: int) -> dict: ... + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py index cd1bb595a..9ff1ec278 100644 --- a/openml/_api/resources/datasets.py +++ b/openml/_api/resources/datasets.py @@ -1,13 +1,20 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from openml._api.resources.base import DatasetsAPI +if TYPE_CHECKING: + from responses import Response + + from openml.datasets.dataset import OpenMLDataset + class DatasetsV1(DatasetsAPI): - def get(self, id): - pass + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError class DatasetsV2(DatasetsAPI): - def get(self, id): - pass + def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: + raise NotImplementedError diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index b0e9afbf8..f494fb9a3 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import xmltodict from openml._api.resources.base import TasksAPI @@ -12,12 +14,20 @@ TaskType, ) +if TYPE_CHECKING: + from requests import Response + class TasksV1(TasksAPI): - def get(self, id, return_response=False): - path = f"task/{id}" + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + path = f"task/{task_id}" response = self._http.get(path) - xml_content = response.content + xml_content = response.text task = self._create_task_from_xml(xml_content) if return_response: @@ -109,5 +119,10 @@ def _create_task_from_xml(self, xml: str) -> OpenMLTask: class TasksV2(TasksAPI): - def get(self, id): - pass + def get( + self, + task_id: int, + *, + return_response: bool = False, + ) -> OpenMLTask | tuple[OpenMLTask, Response]: + raise NotImplementedError diff --git a/openml/_api/runtime/__init__.py b/openml/_api/runtime/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index 80f35587c..aa09a69db 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -1,5 +1,7 @@ from __future__ import annotations +from typing import TYPE_CHECKING + from openml._api.config import ( API_V1_SERVER, API_V2_SERVER, @@ -11,16 +13,18 @@ TasksV1, TasksV2, ) -from openml._api.runtime.fallback import FallbackProxy + +if TYPE_CHECKING: + from openml._api.resources.base import DatasetsAPI, TasksAPI class APIBackend: - def __init__(self, *, datasets, tasks): + def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): self.datasets = datasets self.tasks = tasks -def build_backend(version: str, strict: bool) -> APIBackend: +def build_backend(version: str, *, strict: bool) -> APIBackend: v1_http = HTTPClient(API_V1_SERVER) v2_http = HTTPClient(API_V2_SERVER) @@ -40,19 +44,16 @@ def build_backend(version: str, strict: bool) -> APIBackend: if strict: return v2 - return APIBackend( - datasets=FallbackProxy(v2.datasets, v1.datasets), - tasks=FallbackProxy(v2.tasks, v1.tasks), - ) + return v1 class APIContext: - def __init__(self): + def __init__(self) -> None: self._backend = build_backend("v1", strict=False) - def set_version(self, version: str, strict: bool = False): - self._backend = build_backend(version, strict) + def set_version(self, version: str, *, strict: bool = False) -> None: + self._backend = build_backend(version=version, strict=strict) @property - def backend(self): + def backend(self) -> APIBackend: return self._backend diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py index 56e96a966..1bc99d270 100644 --- a/openml/_api/runtime/fallback.py +++ b/openml/_api/runtime/fallback.py @@ -1,5 +1,12 @@ from __future__ import annotations +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.resources.base import ResourceAPI + class FallbackProxy: - pass + def __init__(self, primary: ResourceAPI, fallback: ResourceAPI): + self._primary = primary + self._fallback = fallback diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index ef67f75bf..a794ad56d 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -445,10 +445,14 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - task, response = api_context.backend.tasks.get(task_id, return_response=True) - - with xml_file.open("w", encoding="utf8") as fh: - fh.write(response.text) + result = api_context.backend.tasks.get(task_id, return_response=True) + + if isinstance(result, tuple): + task, response = result + with xml_file.open("w", encoding="utf8") as fh: + fh.write(response.text) + else: + task = result return task From 5dfcbce55a027d19cd502ea7bb3d521c2b1bca29 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 7 Jan 2026 22:14:31 +0500 Subject: [PATCH 3/9] refactor --- openml/_api/config.py | 62 +++++++++++++++++++++++++++++++++++-- openml/_api/http/client.py | 18 +++++++---- openml/_api/runtime/core.py | 9 ++---- 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index bd93c3cad..1431f66b1 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -1,5 +1,61 @@ from __future__ import annotations -API_V1_SERVER = "https://www.openml.org/api/v1/xml" -API_V2_SERVER = "http://127.0.0.1:8001" -API_KEY = "..." +from dataclasses import dataclass +from typing import Literal + +DelayMethod = Literal["human", "robot"] + + +@dataclass +class APIConfig: + server: str + base_url: str + key: str + + +@dataclass +class APISettings: + v1: APIConfig + v2: APIConfig + + +@dataclass +class ConnectionConfig: + retries: int = 3 + delay_method: DelayMethod = "human" + delay_time: int = 1 # seconds + + def __post_init__(self) -> None: + if self.delay_method not in ("human", "robot"): + raise ValueError(f"delay_method must be 'human' or 'robot', got {self.delay_method}") + + +@dataclass +class CacheConfig: + dir: str = "~/.openml/cache" + ttl: int = 60 * 60 * 24 * 7 # one week + + +@dataclass +class Settings: + api: APISettings + connection: ConnectionConfig + cache: CacheConfig + + +settings = Settings( + api=APISettings( + v1=APIConfig( + server="https://www.openml.org/", + base_url="api/v1/xml/", + key="...", + ), + v2=APIConfig( + server="http://127.0.0.1:8001/", + base_url="", + key="...", + ), + ), + connection=ConnectionConfig(), + cache=CacheConfig(), +) diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index dea5de809..74e08c709 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,24 +1,30 @@ from __future__ import annotations -from typing import Any, Mapping +from typing import TYPE_CHECKING, Any, Mapping import requests from requests import Response from openml.__version__ import __version__ +if TYPE_CHECKING: + from openml._api.config import APIConfig + class HTTPClient: - def __init__(self, base_url: str) -> None: - self.base_url = base_url + def __init__(self, config: APIConfig) -> None: + self.config = config self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} + def _create_url(self, path: str) -> str: + return self.config.server + self.config.base_url + path + def get( self, path: str, params: Mapping[str, Any] | None = None, ) -> Response: - url = f"{self.base_url}/{path}" + url = self._create_url(path) return requests.get(url, params=params, headers=self.headers, timeout=10) def post( @@ -27,7 +33,7 @@ def post( data: Mapping[str, Any] | None = None, files: Any = None, ) -> Response: - url = f"{self.base_url}/{path}" + url = self._create_url(path) return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) def delete( @@ -35,5 +41,5 @@ def delete( path: str, params: Mapping[str, Any] | None = None, ) -> Response: - url = f"{self.base_url}/{path}" + url = self._create_url(path) return requests.delete(url, params=params, headers=self.headers, timeout=10) diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py index aa09a69db..98b587411 100644 --- a/openml/_api/runtime/core.py +++ b/openml/_api/runtime/core.py @@ -2,10 +2,7 @@ from typing import TYPE_CHECKING -from openml._api.config import ( - API_V1_SERVER, - API_V2_SERVER, -) +from openml._api.config import settings from openml._api.http.client import HTTPClient from openml._api.resources import ( DatasetsV1, @@ -25,8 +22,8 @@ def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI): def build_backend(version: str, *, strict: bool) -> APIBackend: - v1_http = HTTPClient(API_V1_SERVER) - v2_http = HTTPClient(API_V2_SERVER) + v1_http = HTTPClient(config=settings.api.v1) + v2_http = HTTPClient(config=settings.api.v2) v1 = APIBackend( datasets=DatasetsV1(v1_http), From 2acbe9992cf95bfc103ff4fa0c360a58c1842870 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 7 Jan 2026 22:24:03 +0500 Subject: [PATCH 4/9] implement cache_dir --- openml/_api/http/client.py | 74 +++++++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 8 deletions(-) diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index 74e08c709..49b05c88e 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -1,36 +1,93 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Mapping +from pathlib import Path +from typing import TYPE_CHECKING, Any +from urllib.parse import urlencode, urljoin, urlparse import requests from requests import Response from openml.__version__ import __version__ +from openml._api.config import settings if TYPE_CHECKING: from openml._api.config import APIConfig -class HTTPClient: +class CacheMixin: + @property + def dir(self) -> str: + return settings.cache.dir + + @property + def ttl(self) -> int: + return settings.cache.ttl + + def _get_cache_directory(self, url: str, params: dict[str, Any]) -> Path: + parsed_url = urlparse(url) + netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain + path_parts = parsed_url.path.strip("/").split("/") + + # remove api_key and serialize params if any + filtered_params = {k: v for k, v in params.items() if k != "api_key"} + params_part = [urlencode(filtered_params)] if filtered_params else [] + + return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) + + def _get_cache_response(self, url: str, params: dict[str, Any]) -> Response | None: # noqa: ARG002 + return None + + def _set_cache_response(self, url: str, params: dict[str, Any], response: Response) -> None: # noqa: ARG002 + return None + + +class HTTPClient(CacheMixin): def __init__(self, config: APIConfig) -> None: self.config = config self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"} - def _create_url(self, path: str) -> str: - return self.config.server + self.config.base_url + path + @property + def server(self) -> str: + return self.config.server + + @property + def base_url(self) -> str: + return self.config.base_url + + def _create_url(self, path: str) -> Any: + return urljoin(self.server, urljoin(self.base_url, path)) def get( self, path: str, - params: Mapping[str, Any] | None = None, + *, + params: dict[str, Any] | None = None, + use_cache: bool = False, + use_api_key: bool = False, ) -> Response: url = self._create_url(path) - return requests.get(url, params=params, headers=self.headers, timeout=10) + params = dict(params) if params is not None else {} + + if use_api_key: + params["api_key"] = self.config.key + + if use_cache: + response = self._get_cache_response(url, params) + if response: + return response + + response = requests.get(url, params=params, headers=self.headers, timeout=10) + + if use_cache: + self._set_cache_response(url, params, response) + + return response def post( self, path: str, - data: Mapping[str, Any] | None = None, + *, + data: dict[str, Any] | None = None, files: Any = None, ) -> Response: url = self._create_url(path) @@ -39,7 +96,8 @@ def post( def delete( self, path: str, - params: Mapping[str, Any] | None = None, + *, + params: dict[str, Any] | None = None, ) -> Response: url = self._create_url(path) return requests.delete(url, params=params, headers=self.headers, timeout=10) From af99880a9e16a49833c63084c9e9267c112b6b91 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Wed, 7 Jan 2026 23:42:17 +0500 Subject: [PATCH 5/9] refactor --- openml/_api/config.py | 1 + openml/_api/http/client.py | 100 +++++++++++++++++++++++++++---------- 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/openml/_api/config.py b/openml/_api/config.py index 1431f66b1..848fe8da1 100644 --- a/openml/_api/config.py +++ b/openml/_api/config.py @@ -11,6 +11,7 @@ class APIConfig: server: str base_url: str key: str + timeout: int = 10 # seconds @dataclass diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py index 49b05c88e..a90e93933 100644 --- a/openml/_api/http/client.py +++ b/openml/_api/http/client.py @@ -23,7 +23,7 @@ def dir(self) -> str: def ttl(self) -> int: return settings.cache.ttl - def _get_cache_directory(self, url: str, params: dict[str, Any]) -> Path: + def _get_cache_dir(self, url: str, params: dict[str, Any]) -> Path: parsed_url = urlparse(url) netloc_parts = parsed_url.netloc.split(".")[::-1] # reverse domain path_parts = parsed_url.path.strip("/").split("/") @@ -34,10 +34,10 @@ def _get_cache_directory(self, url: str, params: dict[str, Any]) -> Path: return Path(self.dir).joinpath(*netloc_parts, *path_parts, *params_part) - def _get_cache_response(self, url: str, params: dict[str, Any]) -> Response | None: # noqa: ARG002 - return None + def _get_cache_response(self, cache_dir: Path) -> Response: # noqa: ARG002 + return Response() - def _set_cache_response(self, url: str, params: dict[str, Any], response: Response) -> None: # noqa: ARG002 + def _set_cache_response(self, cache_dir: Path, response: Response) -> None: # noqa: ARG002 return None @@ -54,50 +54,98 @@ def server(self) -> str: def base_url(self) -> str: return self.config.base_url - def _create_url(self, path: str) -> Any: - return urljoin(self.server, urljoin(self.base_url, path)) + @property + def key(self) -> str: + return self.config.key - def get( + @property + def timeout(self) -> int: + return self.config.timeout + + def request( self, + method: str, path: str, *, - params: dict[str, Any] | None = None, use_cache: bool = False, use_api_key: bool = False, + **request_kwargs: Any, ) -> Response: - url = self._create_url(path) - params = dict(params) if params is not None else {} + url = urljoin(self.server, urljoin(self.base_url, path)) + params = request_kwargs.pop("params", {}) + params = params.copy() if use_api_key: - params["api_key"] = self.config.key + params["api_key"] = self.key - if use_cache: - response = self._get_cache_response(url, params) - if response: - return response + headers = request_kwargs.pop("headers", {}) + headers = headers.copy() + headers.update(self.headers) + + timeout = request_kwargs.pop("timeout", self.timeout) + cache_dir = self._get_cache_dir(url, params) - response = requests.get(url, params=params, headers=self.headers, timeout=10) + if use_cache: + try: + return self._get_cache_response(cache_dir) + # TODO: handle ttl expired error + except Exception: + raise + + response = requests.request( + method=method, + url=url, + params=params, + headers=headers, + timeout=timeout, + **request_kwargs, + ) if use_cache: - self._set_cache_response(url, params, response) + self._set_cache_response(cache_dir, response) return response - def post( + def get( self, path: str, *, - data: dict[str, Any] | None = None, - files: Any = None, + use_cache: bool = False, + use_api_key: bool = False, + **request_kwargs: Any, ) -> Response: - url = self._create_url(path) - return requests.post(url, data=data, files=files, headers=self.headers, timeout=10) + # TODO: remove override when cache is implemented + use_cache = False + return self.request( + method="GET", + path=path, + use_cache=use_cache, + use_api_key=use_api_key, + **request_kwargs, + ) + + def post( + self, + path: str, + **request_kwargs: Any, + ) -> Response: + return self.request( + method="POST", + path=path, + use_cache=False, + use_api_key=True, + **request_kwargs, + ) def delete( self, path: str, - *, - params: dict[str, Any] | None = None, + **request_kwargs: Any, ) -> Response: - url = self._create_url(path) - return requests.delete(url, params=params, headers=self.headers, timeout=10) + return self.request( + method="DELETE", + path=path, + use_cache=False, + use_api_key=True, + **request_kwargs, + ) From 17a71783ce09e8df847a854e884e32797b919a3c Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Fri, 9 Jan 2026 14:34:25 +0530 Subject: [PATCH 6/9] git commit --no-verify ported functions to APIv1 --- .../Advanced/fetch_evaluations_tutorial.py | 2 +- examples/Basics/introduction_tutorial.py | 2 +- .../2018_kdd_rijn_example.py | 7 +- .../fetch_runtimes_tutorial.py | 1 + openml/_api/resources/tasks.py | 577 +++++++++++++++++- openml/tasks/functions.py | 23 +- 6 files changed, 577 insertions(+), 35 deletions(-) diff --git a/examples/Advanced/fetch_evaluations_tutorial.py b/examples/Advanced/fetch_evaluations_tutorial.py index 1b759423b..b6cee9ab7 100644 --- a/examples/Advanced/fetch_evaluations_tutorial.py +++ b/examples/Advanced/fetch_evaluations_tutorial.py @@ -178,4 +178,4 @@ def plot_flow_compare(evaluations, top_n=10, metric="predictive_accuracy"): function="predictive_accuracy", flows=[6767], size=100, parameters_in_separate_columns=True ) -print(evals_setups.head(10)) \ No newline at end of file +print(evals_setups.head(10)) diff --git a/examples/Basics/introduction_tutorial.py b/examples/Basics/introduction_tutorial.py index c864772f5..4b972b95b 100644 --- a/examples/Basics/introduction_tutorial.py +++ b/examples/Basics/introduction_tutorial.py @@ -52,4 +52,4 @@ # %% import openml -openml.config.set_root_cache_directory("YOURDIR") \ No newline at end of file +openml.config.set_root_cache_directory("YOURDIR") diff --git a/examples/_external_or_deprecated/2018_kdd_rijn_example.py b/examples/_external_or_deprecated/2018_kdd_rijn_example.py index 6522013e3..c6c069d6a 100644 --- a/examples/_external_or_deprecated/2018_kdd_rijn_example.py +++ b/examples/_external_or_deprecated/2018_kdd_rijn_example.py @@ -49,7 +49,6 @@ import openml - ############################################################################## # With the advent of automated machine learning, automated hyperparameter # optimization methods are by now routinely used in data mining. However, this @@ -121,7 +120,7 @@ [ dict( **{name: json.loads(value) for name, value in setup["parameters"].items()}, - **{performance_column: setup[performance_column]} + **{performance_column: setup[performance_column]}, ) for _, setup in evals.iterrows() ] @@ -161,7 +160,9 @@ fanova_results.append( { "hyperparameter": pname.split(".")[-1], - "fanova": evaluator.quantify_importance([idx])[(idx,)]["individual importance"], + "fanova": evaluator.quantify_importance([idx])[(idx,)][ + "individual importance" + ], } ) except RuntimeError as e: diff --git a/examples/_external_or_deprecated/fetch_runtimes_tutorial.py b/examples/_external_or_deprecated/fetch_runtimes_tutorial.py index b2a3f1d2a..ff3132c89 100644 --- a/examples/_external_or_deprecated/fetch_runtimes_tutorial.py +++ b/examples/_external_or_deprecated/fetch_runtimes_tutorial.py @@ -284,6 +284,7 @@ def print_compare_runtimes(measures): # %% + def extract_refit_time(run, repeat, fold): refit_time = ( run.fold_evaluations["wall_clock_time_millis"][repeat][fold] diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index f494fb9a3..2305ef0cd 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -1,40 +1,123 @@ from __future__ import annotations -from typing import TYPE_CHECKING +import warnings +from functools import partial +from typing import Any +import pandas as pd import xmltodict +import openml.utils from openml._api.resources.base import TasksAPI +from openml.datasets import get_dataset +from openml.exceptions import OpenMLCacheException from openml.tasks.task import ( OpenMLClassificationTask, OpenMLClusteringTask, OpenMLLearningCurveTask, OpenMLRegressionTask, + OpenMLSupervisedTask, OpenMLTask, TaskType, ) -if TYPE_CHECKING: - from requests import Response +TASKS_CACHE_DIR_NAME = "tasks" class TasksV1(TasksAPI): - def get( + @openml.utils.thread_safe_if_oslo_installed + def get_task( self, task_id: int, - *, - return_response: bool = False, - ) -> OpenMLTask | tuple[OpenMLTask, Response]: - path = f"task/{task_id}" - response = self._http.get(path) - xml_content = response.text - task = self._create_task_from_xml(xml_content) + download_splits: bool = False, # noqa: FBT001, FBT002 + **get_dataset_kwargs: Any, + ) -> OpenMLTask: + """Download OpenML task for a given task ID. - if return_response: - return task, response + Downloads the task representation. + + Use the `download_splits` parameter to control whether the splits are downloaded. + Moreover, you may pass additional parameter (args or kwargs) that are passed to + :meth:`openml.datasets.get_dataset`. + + Parameters + ---------- + task_id : int + The OpenML task id of the task to download. + download_splits: bool (default=False) + Whether to download the splits as well. + get_dataset_kwargs : + Args and kwargs can be used pass optional parameters to + :meth:`openml.datasets.get_dataset`. + + Returns + ------- + task: OpenMLTask + """ + if not isinstance(task_id, int): + raise TypeError(f"Task id should be integer, is {type(task_id)}") + + cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) + tid_cache_dir = cache_key_dir / str(task_id) + tid_cache_dir_existed = tid_cache_dir.exists() + try: + task = self._get_task_description(task_id) + dataset = get_dataset(task.dataset_id, **get_dataset_kwargs) + # List of class labels available in dataset description + # Including class labels as part of task meta data handles + # the case where data download was initially disabled + if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)): + task.class_labels = dataset.retrieve_class_labels(task.target_name) + # Clustering tasks do not have class labels + # and do not offer download_split + if download_splits and isinstance(task, OpenMLSupervisedTask): + task.download_split() + except Exception as e: + if not tid_cache_dir_existed: + openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir) + raise e return task + def _get_cached_task(self, tid: int) -> OpenMLTask: + """Return a cached task based on the given id. + + Parameters + ---------- + tid : int + Id of the task. + + Returns + ------- + OpenMLTask + """ + tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, tid) + + task_xml_path = tid_cache_dir / "task.xml" + try: + with task_xml_path.open(encoding="utf8") as fh: + return self._create_task_from_xml(fh.read()) + except OSError as e: + openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir) + raise OpenMLCacheException(f"Task file for tid {tid} not cached") from e + + def _get_task_description(self, task_id: int) -> OpenMLTask: + try: + return self._get_cached_task(task_id) + except OpenMLCacheException: + _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) + xml_file = _cache_dir / "task.xml" + result = self.api_context.backend.tasks.get(task_id, return_response=True) + + if isinstance(result, tuple): + task, response = result + with xml_file.open("w", encoding="utf8") as fh: + fh.write(response.text) + else: + task = result + + return task + def _create_task_from_xml(self, xml: str) -> OpenMLTask: """Create a task given a xml string. @@ -117,12 +200,470 @@ def _create_task_from_xml(self, xml: str) -> OpenMLTask: raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") return cls(**common_kwargs) # type: ignore + def list_tasks( # noqa: PLR0913 + self, + task_type: TaskType | None = None, + offset: int | None = None, + size: int | None = None, + tag: str | None = None, + data_tag: str | None = None, + status: str | None = None, + data_name: str | None = None, + data_id: int | None = None, + number_instances: int | None = None, + number_features: int | None = None, + number_classes: int | None = None, + number_missing_values: int | None = None, + ) -> pd.DataFrame: + """ + Return a number of tasks having the given tag and task_type + + Parameters + ---------- + Filter task_type is separated from the other filters because + it is used as task_type in the task description, but it is named + type when used as a filter in list tasks call. + offset : int, optional + the number of tasks to skip, starting from the first + task_type : TaskType, optional + Refers to the type of task. + size : int, optional + the maximum number of tasks to show + tag : str, optional + the tag to include + data_tag : str, optional + the tag of the dataset + data_id : int, optional + status : str, optional + data_name : str, optional + number_instances : int, optional + number_features : int, optional + number_classes : int, optional + number_missing_values : int, optional + + Returns + ------- + dataframe + All tasks having the given task_type and the give tag. Every task is + represented by a row in the data frame containing the following information + as columns: task id, dataset id, task_type and status. If qualities are + calculated for the associated dataset, some of these are also returned. + """ + listing_call = partial( + self._list_tasks, + task_type=task_type, + tag=tag, + data_tag=data_tag, + status=status, + data_id=data_id, + data_name=data_name, + number_instances=number_instances, + number_features=number_features, + number_classes=number_classes, + number_missing_values=number_missing_values, + ) + batches = openml.utils._list_all(listing_call, offset=offset, limit=size) + if len(batches) == 0: + return pd.DataFrame() + + return pd.concat(batches) + + def _list_tasks( + self, + limit: int, + offset: int, + task_type: TaskType | int | None = None, + **kwargs: Any, + ) -> pd.DataFrame: + """ + Perform the api call to return a number of tasks having the given filters. + + Parameters + ---------- + Filter task_type is separated from the other filters because + it is used as task_type in the task description, but it is named + type when used as a filter in list tasks call. + limit: int + offset: int + task_type : TaskType, optional + Refers to the type of task. + kwargs: dict, optional + Legal filter operators: tag, task_id (list), data_tag, status, limit, + offset, data_id, data_name, number_instances, number_features, + number_classes, number_missing_values. + + Returns + ------- + dataframe + """ + api_call = "task/list" + if limit is not None: + api_call += f"/limit/{limit}" + if offset is not None: + api_call += f"/offset/{offset}" + if task_type is not None: + tvalue = task_type.value if isinstance(task_type, TaskType) else task_type + api_call += f"/type/{tvalue}" + if kwargs is not None: + for operator, value in kwargs.items(): + if value is not None: + if operator == "task_id": + value = ",".join([str(int(i)) for i in value]) # noqa: PLW2901 + api_call += f"/{operator}/{value}" + + return self.__list_tasks(api_call=api_call) + + def __list_tasks(self, api_call: str) -> pd.DataFrame: # noqa: C901, PLR0912 + """Returns a Pandas DataFrame with information about OpenML tasks. + + Parameters + ---------- + api_call : str + The API call specifying which tasks to return. + + Returns + ------- + A Pandas DataFrame with information about OpenML tasks. + + Raises + ------ + ValueError + If the XML returned by the OpenML API does not contain 'oml:tasks', '@xmlns:oml', + or has an incorrect value for '@xmlns:oml'. + KeyError + If an invalid key is found in the XML for a task. + """ + xml_string = openml._api_calls._perform_api_call(api_call, "get") + tasks_dict = xmltodict.parse(xml_string, force_list=("oml:task", "oml:input")) + # Minimalistic check if the XML is useful + if "oml:tasks" not in tasks_dict: + raise ValueError(f'Error in return XML, does not contain "oml:runs": {tasks_dict}') + + if "@xmlns:oml" not in tasks_dict["oml:tasks"]: + raise ValueError( + f'Error in return XML, does not contain "oml:runs"/@xmlns:oml: {tasks_dict}' + ) + + if tasks_dict["oml:tasks"]["@xmlns:oml"] != "http://openml.org/openml": + raise ValueError( + "Error in return XML, value of " + '"oml:runs"/@xmlns:oml is not ' + f'"http://openml.org/openml": {tasks_dict!s}', + ) + + assert isinstance(tasks_dict["oml:tasks"]["oml:task"], list), type(tasks_dict["oml:tasks"]) + + tasks = {} + procs = self._get_estimation_procedure_list() + proc_dict = {x["id"]: x for x in procs} + + for task_ in tasks_dict["oml:tasks"]["oml:task"]: + tid = None + try: + tid = int(task_["oml:task_id"]) + task_type_int = int(task_["oml:task_type_id"]) + try: + task_type_id = TaskType(task_type_int) + except ValueError as e: + warnings.warn( + f"Could not create task type id for {task_type_int} due to error {e}", + RuntimeWarning, + stacklevel=2, + ) + continue + + task = { + "tid": tid, + "ttid": task_type_id, + "did": int(task_["oml:did"]), + "name": task_["oml:name"], + "task_type": task_["oml:task_type"], + "status": task_["oml:status"], + } + + # Other task inputs + for _input in task_.get("oml:input", []): + if _input["@name"] == "estimation_procedure": + task[_input["@name"]] = proc_dict[int(_input["#text"])]["name"] + else: + value = _input.get("#text") + task[_input["@name"]] = value + + # The number of qualities can range from 0 to infinity + for quality in task_.get("oml:quality", []): + if "#text" not in quality: + quality_value = 0.0 + else: + quality["#text"] = float(quality["#text"]) + if abs(int(quality["#text"]) - quality["#text"]) < 0.0000001: + quality["#text"] = int(quality["#text"]) + quality_value = quality["#text"] + task[quality["@name"]] = quality_value + tasks[tid] = task + except KeyError as e: + if tid is not None: + warnings.warn( + "Invalid xml for task %d: %s\nFrom %s" % (tid, e, task_), + RuntimeWarning, + stacklevel=2, + ) + else: + warnings.warn( + f"Could not find key {e} in {task_}!", RuntimeWarning, stacklevel=2 + ) + + return pd.DataFrame.from_dict(tasks, orient="index") + + def _get_estimation_procedure_list(self) -> list[dict[str, Any]]: + """Return a list of all estimation procedures which are on OpenML. + + Returns + ------- + procedures : list + A list of all estimation procedures. Every procedure is represented by + a dictionary containing the following information: id, task type id, + name, type, repeats, folds, stratified. + """ + url_suffix = "estimationprocedure/list" + xml_string = self._http.get(url_suffix) + + procs_dict = xmltodict.parse(xml_string) + # Minimalistic check if the XML is useful + if "oml:estimationprocedures" not in procs_dict: + raise ValueError("Error in return XML, does not contain tag oml:estimationprocedures.") + + if "@xmlns:oml" not in procs_dict["oml:estimationprocedures"]: + raise ValueError( + "Error in return XML, does not contain tag " + "@xmlns:oml as a child of oml:estimationprocedures.", + ) + + if procs_dict["oml:estimationprocedures"]["@xmlns:oml"] != "http://openml.org/openml": + raise ValueError( + "Error in return XML, value of " + "oml:estimationprocedures/@xmlns:oml is not " + "http://openml.org/openml, but {}".format( + str(procs_dict["oml:estimationprocedures"]["@xmlns:oml"]) + ), + ) + + procs: list[dict[str, Any]] = [] + for proc_ in procs_dict["oml:estimationprocedures"]["oml:estimationprocedure"]: + task_type_int = int(proc_["oml:ttid"]) + try: + task_type_id = TaskType(task_type_int) + procs.append( + { + "id": int(proc_["oml:id"]), + "task_type_id": task_type_id, + "name": proc_["oml:name"], + "type": proc_["oml:type"], + }, + ) + except ValueError as e: + warnings.warn( + f"Could not create task type id for {task_type_int} due to error {e}", + RuntimeWarning, + stacklevel=2, + ) + + return procs + + def get_tasks( + self, + task_ids: list[int], + download_data: bool | None = None, + download_qualities: bool | None = None, + ) -> list[OpenMLTask]: + """Download tasks. + + This function iterates :meth:`openml.tasks.get_task`. + + Parameters + ---------- + task_ids : List[int] + A list of task ids to download. + download_data : bool (default = True) + Option to trigger download of data along with the meta data. + download_qualities : bool (default=True) + Option to download 'qualities' meta-data in addition to the minimal dataset description. + + Returns + ------- + list + """ + if download_data is None: + warnings.warn( + "`download_data` will default to False starting in 0.16. " + "Please set `download_data` explicitly to suppress this warning.", + stacklevel=1, + ) + download_data = True + + if download_qualities is None: + warnings.warn( + "`download_qualities` will default to False starting in 0.16. " + "Please set `download_qualities` explicitly to suppress this warning.", + stacklevel=1, + ) + download_qualities = True + + tasks = [] + for task_id in task_ids: + tasks.append( + self.get_task( + task_id, download_data=download_data, download_qualities=download_qualities + ) + ) + return tasks + + def create_task( + self, + task_type: TaskType, + dataset_id: int, + estimation_procedure_id: int, + target_name: str | None = None, + evaluation_measure: str | None = None, + **kwargs: Any, + ) -> ( + OpenMLClassificationTask + | OpenMLRegressionTask + | OpenMLLearningCurveTask + | OpenMLClusteringTask + ): + """Create a task based on different given attributes. + + Builds a task object with the function arguments as + attributes. The type of the task object built is + determined from the task type id. + More information on how the arguments (task attributes), + relate to the different possible tasks can be found in + the individual task objects at the openml.tasks.task + module. + + Parameters + ---------- + task_type : TaskType + Id of the task type. + dataset_id : int + The id of the dataset for the task. + target_name : str, optional + The name of the feature used as a target. + At the moment, only optional for the clustering tasks. + estimation_procedure_id : int + The id of the estimation procedure. + evaluation_measure : str, optional + The name of the evaluation measure. + kwargs : dict, optional + Other task attributes that are not mandatory + for task upload. + + Returns + ------- + OpenMLClassificationTask, OpenMLRegressionTask, + OpenMLLearningCurveTask, OpenMLClusteringTask + """ + if task_type == TaskType.CLUSTERING: + task_cls = OpenMLClusteringTask + elif task_type == TaskType.LEARNING_CURVE: + task_cls = OpenMLLearningCurveTask # type: ignore + elif task_type == TaskType.SUPERVISED_CLASSIFICATION: + task_cls = OpenMLClassificationTask # type: ignore + elif task_type == TaskType.SUPERVISED_REGRESSION: + task_cls = OpenMLRegressionTask # type: ignore + else: + raise NotImplementedError(f"Task type {task_type:d} not supported.") + + return task_cls( + task_type_id=task_type, + task_type="None", # TODO: refactor to get task type string from ID. + data_set_id=dataset_id, + target_name=target_name, # type: ignore + estimation_procedure_id=estimation_procedure_id, + evaluation_measure=evaluation_measure, + **kwargs, + ) + + # NOTE: not in v2 + def delete_task(self, task_id: int) -> bool: + """Delete task with id `task_id` from the OpenML server. + + You can only delete tasks which you created and have + no runs associated with them. + + Parameters + ---------- + task_id : int + OpenML id of the task + + Returns + ------- + bool + True if the deletion was successful. False otherwise. + """ + return openml.utils._delete_entity("task", task_id) + class TasksV2(TasksAPI): - def get( + @openml.utils.thread_safe_if_oslo_installed + def get_task( self, task_id: int, - *, - return_response: bool = False, - ) -> OpenMLTask | tuple[OpenMLTask, Response]: - raise NotImplementedError + **get_dataset_kwargs: Any, + ) -> OpenMLTask: + if not isinstance(task_id, int): + raise TypeError(f"Task id should be integer, is {type(task_id)}") + + task = self._get_task_description(task_id) + dataset = get_dataset(task.dataset_id, **get_dataset_kwargs) # Shrivaths work + # List of class labels available in dataset description + # Including class labels as part of task meta data handles + # the case where data download was initially disabled + if isinstance(task, (OpenMLClassificationTask, OpenMLLearningCurveTask)): + task.class_labels = dataset.retrieve_class_labels(task.target_name) + + return task + + def _get_task_description(self, task_id: int) -> OpenMLTask: + response = self._http.get(f"tasks/{task_id}") + return self._create_task_from_json(response.json()) + + def _create_task_from_json(self, task_json: dict) -> OpenMLTask: + task_type_id = TaskType(int(task_json["task_type_id"])) + + inputs = {i["name"]: i for i in task_json.get("input", [])} + + source = inputs["source_data"]["data_set"] + + common_kwargs = { + "task_id": int(task_json["id"]), + "task_type": task_json["task_type"], + "task_type_id": task_type_id, + "data_set_id": int(source["data_set_id"]), + "evaluation_measure": None, + } + + if task_type_id in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ): + est = inputs.get("estimation_procedure", {}).get("estimation_procedure") + + if est: + common_kwargs["estimation_procedure_id"] = int(est["id"]) + common_kwargs["estimation_procedure_type"] = est["type"] + common_kwargs["estimation_parameters"] = { + p["name"]: p.get("value") for p in est.get("parameter", []) + } + + common_kwargs["target_name"] = source.get("target_feature") + + cls = { + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }[task_type_id] + + return cls(**common_kwargs) diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index a794ad56d..ae235f38b 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -10,9 +10,7 @@ import pandas as pd import xmltodict -import openml._api_calls import openml.utils -from openml._api import api_context from openml.datasets import get_dataset from openml.exceptions import OpenMLCacheException @@ -127,6 +125,8 @@ def _get_estimation_procedure_list() -> list[dict[str, Any]]: return procs +# v2: /tasktype/{task_type_id} +# v1: /estimationprocedure/list def list_tasks( # noqa: PLR0913 task_type: TaskType | None = None, offset: int | None = None, @@ -340,6 +340,7 @@ def __list_tasks(api_call: str) -> pd.DataFrame: # noqa: C901, PLR0912 return pd.DataFrame.from_dict(tasks, orient="index") +# /tasktype/list def get_tasks( task_ids: list[int], download_data: bool | None = None, @@ -386,6 +387,8 @@ def get_tasks( return tasks +# v1: /task/{task_id} +# v2: /tasks/{task_id} @openml.utils.thread_safe_if_oslo_installed def get_task( task_id: int, @@ -430,7 +433,7 @@ def get_task( # Clustering tasks do not have class labels # and do not offer download_split if download_splits and isinstance(task, OpenMLSupervisedTask): - task.download_split() + task.download_split() # api v1 call except Exception as e: if not tid_cache_dir_existed: openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir) @@ -445,16 +448,11 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - result = api_context.backend.tasks.get(task_id, return_response=True) + task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get") - if isinstance(result, tuple): - task, response = result - with xml_file.open("w", encoding="utf8") as fh: - fh.write(response.text) - else: - task = result - - return task + with xml_file.open("w", encoding="utf8") as fh: + fh.write(task_xml) + return _create_task_from_xml(task_xml) def _create_task_from_xml(xml: str) -> OpenMLTask: @@ -603,6 +601,7 @@ def create_task( ) +# NOTE: not in v2 def delete_task(task_id: int) -> bool: """Delete task with id `task_id` from the OpenML server. From c2b9e1a79c2cfeb61d680db9561ed2d5bb1c17d5 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 12 Jan 2026 01:35:06 +0530 Subject: [PATCH 7/9] commiting latest cahnges --- openml/_api/resources/base.py | 147 ++++++++++++++++++++++++- openml/_api/resources/tasks.py | 51 ++++++++- openml/tasks/functions.py | 1 - openml/tasks/task.py | 1 + x.py | 189 +++++++++++++++++++++++++++++++++ 5 files changed, 380 insertions(+), 9 deletions(-) create mode 100644 x.py diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py index 6fbf8977d..d5742dadd 100644 --- a/openml/_api/resources/base.py +++ b/openml/_api/resources/base.py @@ -1,9 +1,10 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: + from build.lib.openml.tasks.task import TaskType from requests import Response from openml._api.http import HTTPClient @@ -22,10 +23,148 @@ def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response] class TasksAPI(ResourceAPI, ABC): + # Single task retrieval (V1 and V2) @abstractmethod def get( self, task_id: int, - *, - return_response: bool = False, - ) -> OpenMLTask | tuple[OpenMLTask, Response]: ... + download_splits: bool = False, # noqa: FBT001, FBT002 + **get_dataset_kwargs: Any, + ) -> OpenMLTask: + """ + API v1: + GET /task/{task_id} + + API v2: + GET /tasks/{task_id} + """ + ... + + # # Multiple task retrieval (V1 only) + # @abstractmethod + # def get_tasks( + # self, + # task_ids: list[int], + # **kwargs: Any, + # ) -> list[OpenMLTask]: + # """ + # Retrieve multiple tasks. + + # API v1: + # Implemented via repeated GET /task/{task_id} + + # API v2: + # Not currently supported + + # Parameters + # ---------- + # task_ids : list[int] + + # Returns + # ------- + # list[OpenMLTask] + # """ + # ... + + # # Task listing (V1 only) + # @abstractmethod + # def list_tasks( + # self, + # *, + # task_type: TaskType | None = None, + # offset: int | None = None, + # size: int | None = None, + # **filters: Any, + # ): + # """ + # List tasks with filters. + + # API v1: + # GET /task/list + + # API v2: + # Not available. + + # Returns + # ------- + # pandas.DataFrame + # """ + # ... + + # # Task creation (V1 only) + # @abstractmethod + # def create_task( + # self, + # task_type: TaskType, + # dataset_id: int, + # estimation_procedure_id: int, + # **kwargs: Any, + # ) -> OpenMLTask: + # """ + # Create a new task. + + # API v1: + # POST /task + + # API v2: + # Not supported. + + # Returns + # ------- + # OpenMLTask + # """ + # ... + + # # Task deletion (V1 only) + # @abstractmethod + # def delete_task(self, task_id: int) -> bool: + # """ + # Delete a task. + + # API v1: + # DELETE /task/{task_id} + + # API v2: + # Not supported. + + # Returns + # ------- + # bool + # """ + # ... + + # # Task type listing (V2 only) + # @abstractmethod + # def list_task_types(self) -> list[dict[str, Any]]: + # """ + # List all task types. + + # API v2: + # GET /tasktype/list + + # API v1: + # Not available. + + # Returns + # ------- + # list[dict] + # """ + # ... + + # # Task type retrieval (V2 only) + # @abstractmethod + # def get_task_type(self, task_type_id: int) -> dict[str, Any]: + # """ + # Retrieve a single task type. + + # API v2: + # GET /tasktype/{task_type_id} + + # API v1: + # Not available. + + # Returns + # ------- + # dict + # """ + # ... diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index 2305ef0cd..8ca6926a1 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -26,7 +26,7 @@ class TasksV1(TasksAPI): @openml.utils.thread_safe_if_oslo_installed - def get_task( + def get( self, task_id: int, download_splits: bool = False, # noqa: FBT001, FBT002 @@ -477,7 +477,7 @@ def get_tasks( ) -> list[OpenMLTask]: """Download tasks. - This function iterates :meth:`openml.tasks.get_task`. + This function iterates :meth:`openml.tasks.get`. Parameters ---------- @@ -511,7 +511,7 @@ def get_tasks( tasks = [] for task_id in task_ids: tasks.append( - self.get_task( + self.get( task_id, download_data=download_data, download_qualities=download_qualities ) ) @@ -606,14 +606,20 @@ def delete_task(self, task_id: int) -> bool: class TasksV2(TasksAPI): @openml.utils.thread_safe_if_oslo_installed - def get_task( + def get( self, task_id: int, + download_splits: bool = False, # noqa: FBT001, FBT002 **get_dataset_kwargs: Any, ) -> OpenMLTask: if not isinstance(task_id, int): raise TypeError(f"Task id should be integer, is {type(task_id)}") + if download_splits: + warnings.warn( + "`download_splits` is not yet supported in the v2 API and will be ignored.", + stacklevel=2, + ) task = self._get_task_description(task_id) dataset = get_dataset(task.dataset_id, **get_dataset_kwargs) # Shrivaths work # List of class labels available in dataset description @@ -667,3 +673,40 @@ def _create_task_from_json(self, task_json: dict) -> OpenMLTask: }[task_type_id] return cls(**common_kwargs) + + def list_task_types(self) -> list[dict[str, str | int | None]]: + response = self._http.get("tasktype") + payload = response.json() + + return [ + { + "id": int(tt["id"]), + "name": tt["name"], + "description": tt["description"] or None, + "creator": tt.get("creator"), + } + for tt in payload["task_types"]["task_type"] + ] + + def get_task_type(self, task_type_id: int) -> dict[str, Any]: + if not isinstance(task_type_id, int): + raise TypeError("task_type_id must be int") + + response = self._http.get(f"tasktype/{task_type_id}") + tt = response.json()["task_type"] + + return { + "id": int(tt["id"]), + "name": tt["name"], + "description": tt.get("description"), + "creator": tt.get("creator", []), + "creation_date": tt.get("creation_date"), + "inputs": [ + { + "name": i["name"], + "required": i.get("requirement") == "required", + "data_type": i.get("data_type"), + } + for i in tt.get("input", []) + ], + } diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index ae235f38b..08399bfc4 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -340,7 +340,6 @@ def __list_tasks(api_call: str) -> pd.DataFrame: # noqa: C901, PLR0912 return pd.DataFrame.from_dict(tasks, orient="index") -# /tasktype/list def get_tasks( task_ids: list[int], download_data: bool | None = None, diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 395b52482..76c4e7769 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -8,6 +8,7 @@ from enum import Enum from pathlib import Path from typing import TYPE_CHECKING, Any, Sequence +from attr import dataclass from typing_extensions import TypedDict import openml._api_calls diff --git a/x.py b/x.py new file mode 100644 index 000000000..86b179482 --- /dev/null +++ b/x.py @@ -0,0 +1,189 @@ +# import pytest +# import openml +# from openml.tasks.task import OpenMLTask, TaskType +# from openml._api.resources.tasks import TasksV1, TasksV2 + + +# # ---------- shared helpers ---------- + +# TEST_TASK_ID = 1 # stable, public task +# TEST_CLASSIF_TASK_ID = 1 # supervised classification +# TEST_TASK_TYPE_ID = 1 # supervised classification + + +# def assert_basic_task(task: OpenMLTask): +# assert isinstance(task, OpenMLTask) +# assert isinstance(task.task_id, int) +# assert task.task_id > 0 +# assert task.dataset_id is not None +# assert task.task_type_id in TaskType + + +# # ---------- V1 tests ---------- + +# def test_v1_get_task(): +# api = TasksV1(openml.config.get_api_context()) + +# task = api.get(TEST_TASK_ID) +# assert_basic_task(task) + + +# def test_v1_get_task_with_splits(): +# api = TasksV1(openml.config.get_api_context()) + +# task = api.get(TEST_CLASSIF_TASK_ID, download_splits=True) +# assert_basic_task(task) + +# # only supervised tasks have splits +# if hasattr(task, "data_splits"): +# assert task.data_splits is not None + + +# def test_v1_list_tasks(): +# api = TasksV1(openml.config.get_api_context()) + +# df = api.list_tasks(size=5) +# assert not df.empty +# assert "tid" in df.columns + + +# def test_v1_list_tasks_filtered_by_type(): +# api = TasksV1(openml.config.get_api_context()) + +# df = api.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION, size=5) +# assert not df.empty +# assert all(df["ttid"] == TaskType.SUPERVISED_CLASSIFICATION) + + +# def test_v1_get_multiple_tasks(): +# api = TasksV1(openml.config.get_api_context()) + +# tasks = api.get_tasks([1, 2]) +# assert len(tasks) == 2 +# for t in tasks: +# assert_basic_task(t) + + +# # ---------- V2 tests ---------- + +# def test_v2_get_task(): +# api = TasksV2(openml.config.get_api_context()) + +# task = api.get(TEST_TASK_ID) +# assert_basic_task(task) + + +# def test_v2_get_task_warns_on_splits(): +# api = TasksV2(openml.config.get_api_context()) + +# with pytest.warns(UserWarning): +# task = api.get(TEST_TASK_ID, download_splits=True) +# assert_basic_task(task) + + +# def test_v2_list_task_types(): +# api = TasksV2(openml.config.get_api_context()) + +# task_types = api.list_task_types() +# assert isinstance(task_types, list) +# assert len(task_types) > 0 + +# first = task_types[0] +# assert "id" in first +# assert "name" in first + + +# def test_v2_get_task_type(): +# api = TasksV2(openml.config.get_api_context()) + +# tt = api.get_task_type(TEST_TASK_TYPE_ID) +# assert tt["id"] == TEST_TASK_TYPE_ID +# assert "name" in tt +# assert "inputs" in tt +# assert isinstance(tt["inputs"], list) + + +# # ---------- cross-version consistency ---------- + +# def test_v1_v2_same_task_id_consistency(): +# ctx = openml.config.get_api_context() +# v1 = TasksV1(ctx) +# v2 = TasksV2(ctx) + +# t1 = v1.get(TEST_TASK_ID) +# t2 = v2.get(TEST_TASK_ID) + +# assert t1.task_id == t2.task_id +# assert t1.dataset_id == t2.dataset_id +# assert t1.task_type_id == t2.task_type_id + +import openml +from pprint import pprint +from openml._api.config import settings, APIConfig +from openml._api.http.client import HTTPClient +from openml._api.resources import ( + DatasetsV1, + DatasetsV2, + TasksV1, + TasksV2, +) +from openml._api.resources.tasks import TasksV1, TasksV2 +from openml.tasks.task import TaskType + + +def main(): + v1=APIConfig( + server="https://www.openml.org/", + base_url="api/v1/xml/", + key="...", + ) + + v2=APIConfig( + server="http://127.0.0.1:8001/", + base_url="", + key="...", + ) + v1_http = HTTPClient(config=settings.api.v1) + v2_http = HTTPClient(config=settings.api.v2) + tasks_v1 = TasksV1() + tasks_v2 = TasksV2() + + TASK_ID = 2 + TASK_TYPE_ID = 1 # Supervised Classification + + print("\n" + "=" * 80) + print("V1: get(task_id)") + print("=" * 80) + t1 = tasks_v1.get(TASK_ID) + pprint(t1) + print("type:", type(t1)) + + print("\n" + "=" * 80) + print("V2: get(task_id)") + print("=" * 80) + t2 = tasks_v2.get(TASK_ID) + pprint(t2) + print("type:", type(t2)) + + print("\n" + "=" * 80) + print("V1: list_tasks(task_type=SUPERVISED_CLASSIFICATION)") + print("=" * 80) + df_v1 = tasks_v1.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION, size=5) + print(df_v1) + print("shape:", df_v1.shape) + + print("\n" + "=" * 80) + print("V2: list_task_types()") + print("=" * 80) + tt_list = tasks_v2.list_task_types() + pprint(tt_list) + + print("\n" + "=" * 80) + print("V2: get_task_type(task_type_id)") + print("=" * 80) + tt = tasks_v2.get_task_type(TASK_TYPE_ID) + pprint(tt) + + +if __name__ == "__main__": + main() From 056cf3a4e7e81e06fed01a8c21f4faca96f2e283 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 11 Jan 2026 20:05:22 +0000 Subject: [PATCH 8/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- openml/_api/resources/base.py | 3 +-- openml/tasks/task.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py index d5742dadd..cd1957902 100644 --- a/openml/_api/resources/base.py +++ b/openml/_api/resources/base.py @@ -4,7 +4,6 @@ from typing import TYPE_CHECKING, Any if TYPE_CHECKING: - from build.lib.openml.tasks.task import TaskType from requests import Response from openml._api.http import HTTPClient @@ -28,7 +27,7 @@ class TasksAPI(ResourceAPI, ABC): def get( self, task_id: int, - download_splits: bool = False, # noqa: FBT001, FBT002 + download_splits: bool = False, # noqa: FBT001, FBT002 **get_dataset_kwargs: Any, ) -> OpenMLTask: """ diff --git a/openml/tasks/task.py b/openml/tasks/task.py index 76c4e7769..395b52482 100644 --- a/openml/tasks/task.py +++ b/openml/tasks/task.py @@ -8,7 +8,6 @@ from enum import Enum from pathlib import Path from typing import TYPE_CHECKING, Any, Sequence -from attr import dataclass from typing_extensions import TypedDict import openml._api_calls From 17ab23c9ab62e1a85994121cad453c184d9e8e91 Mon Sep 17 00:00:00 2001 From: Satvik Mishra <112589278+satvshr@users.noreply.github.com> Date: Mon, 12 Jan 2026 02:50:16 +0530 Subject: [PATCH 9/9] bug fixing --- openml/_api/resources/tasks.py | 4 +- x.py | 189 --------------------------------- 2 files changed, 2 insertions(+), 191 deletions(-) delete mode 100644 x.py diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py index 8ca6926a1..31c34d313 100644 --- a/openml/_api/resources/tasks.py +++ b/openml/_api/resources/tasks.py @@ -425,7 +425,7 @@ def _get_estimation_procedure_list(self) -> list[dict[str, Any]]: name, type, repeats, folds, stratified. """ url_suffix = "estimationprocedure/list" - xml_string = self._http.get(url_suffix) + xml_string = self._http.get(url_suffix).text procs_dict = xmltodict.parse(xml_string) # Minimalistic check if the XML is useful @@ -675,7 +675,7 @@ def _create_task_from_json(self, task_json: dict) -> OpenMLTask: return cls(**common_kwargs) def list_task_types(self) -> list[dict[str, str | int | None]]: - response = self._http.get("tasktype") + response = self._http.get("tasktype/list") payload = response.json() return [ diff --git a/x.py b/x.py deleted file mode 100644 index 86b179482..000000000 --- a/x.py +++ /dev/null @@ -1,189 +0,0 @@ -# import pytest -# import openml -# from openml.tasks.task import OpenMLTask, TaskType -# from openml._api.resources.tasks import TasksV1, TasksV2 - - -# # ---------- shared helpers ---------- - -# TEST_TASK_ID = 1 # stable, public task -# TEST_CLASSIF_TASK_ID = 1 # supervised classification -# TEST_TASK_TYPE_ID = 1 # supervised classification - - -# def assert_basic_task(task: OpenMLTask): -# assert isinstance(task, OpenMLTask) -# assert isinstance(task.task_id, int) -# assert task.task_id > 0 -# assert task.dataset_id is not None -# assert task.task_type_id in TaskType - - -# # ---------- V1 tests ---------- - -# def test_v1_get_task(): -# api = TasksV1(openml.config.get_api_context()) - -# task = api.get(TEST_TASK_ID) -# assert_basic_task(task) - - -# def test_v1_get_task_with_splits(): -# api = TasksV1(openml.config.get_api_context()) - -# task = api.get(TEST_CLASSIF_TASK_ID, download_splits=True) -# assert_basic_task(task) - -# # only supervised tasks have splits -# if hasattr(task, "data_splits"): -# assert task.data_splits is not None - - -# def test_v1_list_tasks(): -# api = TasksV1(openml.config.get_api_context()) - -# df = api.list_tasks(size=5) -# assert not df.empty -# assert "tid" in df.columns - - -# def test_v1_list_tasks_filtered_by_type(): -# api = TasksV1(openml.config.get_api_context()) - -# df = api.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION, size=5) -# assert not df.empty -# assert all(df["ttid"] == TaskType.SUPERVISED_CLASSIFICATION) - - -# def test_v1_get_multiple_tasks(): -# api = TasksV1(openml.config.get_api_context()) - -# tasks = api.get_tasks([1, 2]) -# assert len(tasks) == 2 -# for t in tasks: -# assert_basic_task(t) - - -# # ---------- V2 tests ---------- - -# def test_v2_get_task(): -# api = TasksV2(openml.config.get_api_context()) - -# task = api.get(TEST_TASK_ID) -# assert_basic_task(task) - - -# def test_v2_get_task_warns_on_splits(): -# api = TasksV2(openml.config.get_api_context()) - -# with pytest.warns(UserWarning): -# task = api.get(TEST_TASK_ID, download_splits=True) -# assert_basic_task(task) - - -# def test_v2_list_task_types(): -# api = TasksV2(openml.config.get_api_context()) - -# task_types = api.list_task_types() -# assert isinstance(task_types, list) -# assert len(task_types) > 0 - -# first = task_types[0] -# assert "id" in first -# assert "name" in first - - -# def test_v2_get_task_type(): -# api = TasksV2(openml.config.get_api_context()) - -# tt = api.get_task_type(TEST_TASK_TYPE_ID) -# assert tt["id"] == TEST_TASK_TYPE_ID -# assert "name" in tt -# assert "inputs" in tt -# assert isinstance(tt["inputs"], list) - - -# # ---------- cross-version consistency ---------- - -# def test_v1_v2_same_task_id_consistency(): -# ctx = openml.config.get_api_context() -# v1 = TasksV1(ctx) -# v2 = TasksV2(ctx) - -# t1 = v1.get(TEST_TASK_ID) -# t2 = v2.get(TEST_TASK_ID) - -# assert t1.task_id == t2.task_id -# assert t1.dataset_id == t2.dataset_id -# assert t1.task_type_id == t2.task_type_id - -import openml -from pprint import pprint -from openml._api.config import settings, APIConfig -from openml._api.http.client import HTTPClient -from openml._api.resources import ( - DatasetsV1, - DatasetsV2, - TasksV1, - TasksV2, -) -from openml._api.resources.tasks import TasksV1, TasksV2 -from openml.tasks.task import TaskType - - -def main(): - v1=APIConfig( - server="https://www.openml.org/", - base_url="api/v1/xml/", - key="...", - ) - - v2=APIConfig( - server="http://127.0.0.1:8001/", - base_url="", - key="...", - ) - v1_http = HTTPClient(config=settings.api.v1) - v2_http = HTTPClient(config=settings.api.v2) - tasks_v1 = TasksV1() - tasks_v2 = TasksV2() - - TASK_ID = 2 - TASK_TYPE_ID = 1 # Supervised Classification - - print("\n" + "=" * 80) - print("V1: get(task_id)") - print("=" * 80) - t1 = tasks_v1.get(TASK_ID) - pprint(t1) - print("type:", type(t1)) - - print("\n" + "=" * 80) - print("V2: get(task_id)") - print("=" * 80) - t2 = tasks_v2.get(TASK_ID) - pprint(t2) - print("type:", type(t2)) - - print("\n" + "=" * 80) - print("V1: list_tasks(task_type=SUPERVISED_CLASSIFICATION)") - print("=" * 80) - df_v1 = tasks_v1.list_tasks(task_type=TaskType.SUPERVISED_CLASSIFICATION, size=5) - print(df_v1) - print("shape:", df_v1.shape) - - print("\n" + "=" * 80) - print("V2: list_task_types()") - print("=" * 80) - tt_list = tasks_v2.list_task_types() - pprint(tt_list) - - print("\n" + "=" * 80) - print("V2: get_task_type(task_type_id)") - print("=" * 80) - tt = tasks_v2.get_task_type(TASK_TYPE_ID) - pprint(tt) - - -if __name__ == "__main__": - main()