From 0159f474c6bbc15f20d52bc946bd252bd852b196 Mon Sep 17 00:00:00 2001 From: geetu040 Date: Tue, 30 Dec 2025 09:11:27 +0500 Subject: [PATCH] set up folder structure and base code --- openml/_api/__init__.py | 8 +++ openml/_api/config.py | 5 ++ openml/_api/http/__init__.py | 1 + openml/_api/http/client.py | 23 ++++++ openml/_api/http/utils.py | 0 openml/_api/resources/__init__.py | 2 + openml/_api/resources/base.py | 22 ++++++ openml/_api/resources/datasets.py | 13 ++++ openml/_api/resources/tasks.py | 113 ++++++++++++++++++++++++++++++ openml/_api/runtime/core.py | 58 +++++++++++++++ openml/_api/runtime/fallback.py | 5 ++ openml/tasks/functions.py | 8 ++- 12 files changed, 255 insertions(+), 3 deletions(-) create mode 100644 openml/_api/__init__.py create mode 100644 openml/_api/config.py create mode 100644 openml/_api/http/__init__.py create mode 100644 openml/_api/http/client.py create mode 100644 openml/_api/http/utils.py create mode 100644 openml/_api/resources/__init__.py create mode 100644 openml/_api/resources/base.py create mode 100644 openml/_api/resources/datasets.py create mode 100644 openml/_api/resources/tasks.py create mode 100644 openml/_api/runtime/core.py create mode 100644 openml/_api/runtime/fallback.py diff --git a/openml/_api/__init__.py b/openml/_api/__init__.py new file mode 100644 index 000000000..5089f94dd --- /dev/null +++ b/openml/_api/__init__.py @@ -0,0 +1,8 @@ +from openml._api.runtime.core import APIContext + + +def set_api_version(version: str, strict=False): + api_context.set_version(version=version, strict=strict) + + +api_context = APIContext() diff --git a/openml/_api/config.py b/openml/_api/config.py new file mode 100644 index 000000000..bd93c3cad --- /dev/null +++ b/openml/_api/config.py @@ -0,0 +1,5 @@ +from __future__ import annotations + +API_V1_SERVER = "https://www.openml.org/api/v1/xml" +API_V2_SERVER = "http://127.0.0.1:8001" +API_KEY = "..." diff --git a/openml/_api/http/__init__.py b/openml/_api/http/__init__.py new file mode 100644 index 000000000..fde2a5b0a --- /dev/null +++ b/openml/_api/http/__init__.py @@ -0,0 +1 @@ +from openml._api.http.client import HTTPClient diff --git a/openml/_api/http/client.py b/openml/_api/http/client.py new file mode 100644 index 000000000..81a9213e3 --- /dev/null +++ b/openml/_api/http/client.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import requests + +from openml.__version__ import __version__ + + +class HTTPClient: + def __init__(self, base_url: str): + self.base_url = base_url + self.headers = {"user-agent": f"openml-python/{__version__}"} + + def get(self, path, params=None): + url = f"{self.base_url}/{path}" + return requests.get(url, params=params, headers=self.headers) + + def post(self, path, data=None, files=None): + url = f"{self.base_url}/{path}" + return requests.post(url, data=data, files=files, headers=self.headers) + + def delete(self, path, params=None): + url = f"{self.base_url}/{path}" + return requests.delete(url, params=params, headers=self.headers) diff --git a/openml/_api/http/utils.py b/openml/_api/http/utils.py new file mode 100644 index 000000000..e69de29bb diff --git a/openml/_api/resources/__init__.py b/openml/_api/resources/__init__.py new file mode 100644 index 000000000..078fc5998 --- /dev/null +++ b/openml/_api/resources/__init__.py @@ -0,0 +1,2 @@ +from openml._api.resources.datasets import DatasetsV1, DatasetsV2 +from openml._api.resources.tasks import TasksV1, TasksV2 diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py new file mode 100644 index 000000000..1fae27665 --- /dev/null +++ b/openml/_api/resources/base.py @@ -0,0 +1,22 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from openml._api.http import HTTPClient + + +class ResourceAPI: + def __init__(self, http: HTTPClient): + self._http = http + + +class DatasetsAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, id: int) -> dict: ... + + +class TasksAPI(ResourceAPI, ABC): + @abstractmethod + def get(self, id: int) -> dict: ... diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py new file mode 100644 index 000000000..cd1bb595a --- /dev/null +++ b/openml/_api/resources/datasets.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from openml._api.resources.base import DatasetsAPI + + +class DatasetsV1(DatasetsAPI): + def get(self, id): + pass + + +class DatasetsV2(DatasetsAPI): + def get(self, id): + pass diff --git a/openml/_api/resources/tasks.py b/openml/_api/resources/tasks.py new file mode 100644 index 000000000..b0e9afbf8 --- /dev/null +++ b/openml/_api/resources/tasks.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import xmltodict + +from openml._api.resources.base import TasksAPI +from openml.tasks.task import ( + OpenMLClassificationTask, + OpenMLClusteringTask, + OpenMLLearningCurveTask, + OpenMLRegressionTask, + OpenMLTask, + TaskType, +) + + +class TasksV1(TasksAPI): + def get(self, id, return_response=False): + path = f"task/{id}" + response = self._http.get(path) + xml_content = response.content + task = self._create_task_from_xml(xml_content) + + if return_response: + return task, response + + return task + + def _create_task_from_xml(self, xml: str) -> OpenMLTask: + """Create a task given a xml string. + + Parameters + ---------- + xml : string + Task xml representation. + + Returns + ------- + OpenMLTask + """ + dic = xmltodict.parse(xml)["oml:task"] + estimation_parameters = {} + inputs = {} + # Due to the unordered structure we obtain, we first have to extract + # the possible keys of oml:input; dic["oml:input"] is a list of + # OrderedDicts + + # Check if there is a list of inputs + if isinstance(dic["oml:input"], list): + for input_ in dic["oml:input"]: + name = input_["@name"] + inputs[name] = input_ + # Single input case + elif isinstance(dic["oml:input"], dict): + name = dic["oml:input"]["@name"] + inputs[name] = dic["oml:input"] + + evaluation_measures = None + if "evaluation_measures" in inputs: + evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][ + "oml:evaluation_measure" + ] + + task_type = TaskType(int(dic["oml:task_type_id"])) + common_kwargs = { + "task_id": dic["oml:task_id"], + "task_type": dic["oml:task_type"], + "task_type_id": task_type, + "data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"], + "evaluation_measure": evaluation_measures, + } + # TODO: add OpenMLClusteringTask? + if task_type in ( + TaskType.SUPERVISED_CLASSIFICATION, + TaskType.SUPERVISED_REGRESSION, + TaskType.LEARNING_CURVE, + ): + # Convert some more parameters + for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][ + "oml:parameter" + ]: + name = parameter["@name"] + text = parameter.get("#text", "") + estimation_parameters[name] = text + + common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:type"] + common_kwargs["estimation_procedure_id"] = int( + inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"] + ) + + common_kwargs["estimation_parameters"] = estimation_parameters + common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][ + "oml:target_feature" + ] + common_kwargs["data_splits_url"] = inputs["estimation_procedure"][ + "oml:estimation_procedure" + ]["oml:data_splits_url"] + + cls = { + TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask, + TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask, + TaskType.CLUSTERING: OpenMLClusteringTask, + TaskType.LEARNING_CURVE: OpenMLLearningCurveTask, + }.get(task_type) + if cls is None: + raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.") + return cls(**common_kwargs) # type: ignore + + +class TasksV2(TasksAPI): + def get(self, id): + pass diff --git a/openml/_api/runtime/core.py b/openml/_api/runtime/core.py new file mode 100644 index 000000000..80f35587c --- /dev/null +++ b/openml/_api/runtime/core.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +from openml._api.config import ( + API_V1_SERVER, + API_V2_SERVER, +) +from openml._api.http.client import HTTPClient +from openml._api.resources import ( + DatasetsV1, + DatasetsV2, + TasksV1, + TasksV2, +) +from openml._api.runtime.fallback import FallbackProxy + + +class APIBackend: + def __init__(self, *, datasets, tasks): + self.datasets = datasets + self.tasks = tasks + + +def build_backend(version: str, strict: bool) -> APIBackend: + v1_http = HTTPClient(API_V1_SERVER) + v2_http = HTTPClient(API_V2_SERVER) + + v1 = APIBackend( + datasets=DatasetsV1(v1_http), + tasks=TasksV1(v1_http), + ) + + if version == "v1": + return v1 + + v2 = APIBackend( + datasets=DatasetsV2(v2_http), + tasks=TasksV2(v2_http), + ) + + if strict: + return v2 + + return APIBackend( + datasets=FallbackProxy(v2.datasets, v1.datasets), + tasks=FallbackProxy(v2.tasks, v1.tasks), + ) + + +class APIContext: + def __init__(self): + self._backend = build_backend("v1", strict=False) + + def set_version(self, version: str, strict: bool = False): + self._backend = build_backend(version, strict) + + @property + def backend(self): + return self._backend diff --git a/openml/_api/runtime/fallback.py b/openml/_api/runtime/fallback.py new file mode 100644 index 000000000..56e96a966 --- /dev/null +++ b/openml/_api/runtime/fallback.py @@ -0,0 +1,5 @@ +from __future__ import annotations + + +class FallbackProxy: + pass diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index d2bf5e946..91be65965 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -12,6 +12,7 @@ import openml._api_calls import openml.utils +from openml._api import api_context from openml.datasets import get_dataset from openml.exceptions import OpenMLCacheException @@ -442,11 +443,12 @@ def _get_task_description(task_id: int) -> OpenMLTask: except OpenMLCacheException: _cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) xml_file = _cache_dir / "task.xml" - task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get") + task, response = api_context.backend.tasks.get(task_id, return_response=True) with xml_file.open("w", encoding="utf8") as fh: - fh.write(task_xml) - return _create_task_from_xml(task_xml) + fh.write(response.text) + + return task def _create_task_from_xml(xml: str) -> OpenMLTask: