From fd062f7840be5d0fd3a3d3e26cd4fdc14aa82dba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Mon, 29 Dec 2025 16:55:05 +0100 Subject: [PATCH 01/18] Update test.yml --- .github/workflows/test.yml | 93 ++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 49 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 31cdff602..b3108ff59 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,7 +14,6 @@ on: branches: - main - develop - concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true @@ -23,70 +22,48 @@ jobs: test: name: (${{ matrix.os }}, Py${{ matrix.python-version }}, sk${{ matrix.scikit-learn }}, sk-only:${{ matrix.sklearn-only }}) runs-on: ${{ matrix.os }} + strategy: + fail-fast: false matrix: - python-version: ["3.9"] - scikit-learn: ["1.0.*", "1.1.*", "1.2.*", "1.3.*", "1.4.*", "1.5.*"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] + scikit-learn: ["1.2.*", "1.3.*", "1.4.*", "1.5.*"] os: [ubuntu-latest] sklearn-only: ["true"] + include: - - os: ubuntu-latest - python-version: "3.8" # no scikit-learn 0.23 release for Python 3.9 - scikit-learn: "0.23.1" - sklearn-only: "true" - # scikit-learn 0.24 relies on scipy defaults, so we need to fix the version - # c.f. https://github.com/openml/openml-python/pull/1267 - - os: ubuntu-latest - python-version: "3.9" - scikit-learn: "0.24" - scipy: "1.10.0" - sklearn-only: "true" - # Do a Windows and Ubuntu test for _all_ openml functionality - # I am not sure why these are on 3.8 and older scikit-learn + # Full test run on Windows - os: windows-latest - python-version: "3.8" - scikit-learn: 0.24.* - scipy: "1.10.0" - sklearn-only: 'false' - # Include a code cov version + python-version: "3.12" + scikit-learn: "1.5.*" + sklearn-only: "false" + + # Coverage run - os: ubuntu-latest + python-version: "3.12" + scikit-learn: "1.5.*" + sklearn-only: "false" code-cov: true - python-version: "3.8" - scikit-learn: 0.23.1 - sklearn-only: 'false' - fail-fast: false steps: - uses: actions/checkout@v4 with: fetch-depth: 2 + - name: Setup Python ${{ matrix.python-version }} - if: matrix.os != 'windows-latest' # windows-latest only uses preinstalled Python (3.9.13) uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + - name: Install test dependencies run: | python -m pip install --upgrade pip pip install -e .[test] + - name: Install scikit-learn ${{ matrix.scikit-learn }} run: | pip install scikit-learn==${{ matrix.scikit-learn }} - - name: Install numpy for Python 3.8 - # Python 3.8 & scikit-learn<0.24 requires numpy<=1.23.5 - if: ${{ matrix.python-version == '3.8' && matrix.scikit-learn == '0.23.1' }} - run: | - pip install numpy==1.23.5 - - name: "Install NumPy 1.x and SciPy <1.11 for scikit-learn < 1.4" - if: ${{ contains(fromJSON('["1.0.*", "1.1.*", "1.2.*", "1.3.*"]'), matrix.scikit-learn) }} - run: | - # scipy has a change to the 'mode' behavior which breaks scikit-learn < 1.4 - # numpy 2.0 has several breaking changes - pip install "numpy<2.0" "scipy<1.11" - - name: Install scipy ${{ matrix.scipy }} - if: ${{ matrix.scipy }} - run: | - pip install scipy==${{ matrix.scipy }} + - name: Store repository status id: status-before if: matrix.os != 'windows-latest' @@ -94,28 +71,45 @@ jobs: git_status=$(git status --porcelain -b) echo "BEFORE=$git_status" >> $GITHUB_ENV echo "Repository status before tests: $git_status" + - name: Show installed dependencies run: python -m pip list + - name: Run tests on Ubuntu Test if: matrix.os == 'ubuntu-latest' run: | - if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi - # Most of the time, running only the scikit-learn tests is sufficient - if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and not production'; else marks='not production'; fi - echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + if [ "${{ matrix.code-cov }}" = "true" ]; then + codecov="--cov=openml --long --cov-report=xml" + fi + + if [ "${{ matrix.sklearn-only }}" = "true" ]; then + marks="sklearn and not production" + else + marks="not production" + fi + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' run: | - if [ ${{ matrix.code-cov }} ]; then codecov='--cov=openml --long --cov-report=xml'; fi - # Most of the time, running only the scikit-learn tests is sufficient - if [ ${{ matrix.sklearn-only }} = 'true' ]; then marks='sklearn and production'; else marks='production'; fi - echo pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + if [ "${{ matrix.code-cov }}" = "true" ]; then + codecov="--cov=openml --long --cov-report=xml" + fi + + if [ "${{ matrix.sklearn-only }}" = "true" ]; then + marks="sklearn and production" + else + marks="production" + fi + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + - name: Run tests on Windows if: matrix.os == 'windows-latest' run: | # we need a separate step because of the bash-specific if-statement in the previous one. pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 + - name: Check for files left behind by test if: matrix.os != 'windows-latest' && always() run: | @@ -127,6 +121,7 @@ jobs: echo "Not all generated files have been deleted!" exit 1 fi + - name: Upload coverage if: matrix.code-cov && always() uses: codecov/codecov-action@v4 From ac4c670375dbf6e4e79c6142ee5853a3e53e327e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Mon, 29 Dec 2025 16:55:26 +0100 Subject: [PATCH 02/18] identifiers --- pyproject.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 2bf762b09..ede204ca0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,12 +50,11 @@ classifiers = [ "Operating System :: Unix", "Operating System :: MacOS", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] license = { file = "LICENSE" } From 04c473afbda501b308f0cacae13a53a793c2b063 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Mon, 29 Dec 2025 17:07:48 +0100 Subject: [PATCH 03/18] Update test.yml --- .github/workflows/test.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b3108ff59..0df538fa9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -55,14 +55,10 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Install test dependencies + - name: Install test dependencies and scikit-learn run: | python -m pip install --upgrade pip - pip install -e .[test] - - - name: Install scikit-learn ${{ matrix.scikit-learn }} - run: | - pip install scikit-learn==${{ matrix.scikit-learn }} + pip install -e .[test] scikit-learn==${{ matrix.scikit-learn }} - name: Store repository status id: status-before From af041915b27f8023b8956630ecdc05d3572d5626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Mon, 29 Dec 2025 17:15:31 +0100 Subject: [PATCH 04/18] Update test.yml --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0df538fa9..9d41436d0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,8 +26,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] - scikit-learn: ["1.2.*", "1.3.*", "1.4.*", "1.5.*"] + python-version: ["3.10", "3.11", "3.12", "3.13"] + scikit-learn: ["1.3.*", "1.4.*", "1.5.*", "1.6.*", "1.7.*"] os: [ubuntu-latest] sklearn-only: ["true"] From 16309a68fa27712d0cb7d0b86a1720a3e46b93fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Mon, 29 Dec 2025 21:59:10 +0100 Subject: [PATCH 05/18] Update test.yml --- .github/workflows/test.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 54fbc7a72..32d3602e1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,6 +31,11 @@ jobs: os: [ubuntu-latest] sklearn-only: ["true"] + exclude: + # incompatible version combinations + - python-version: "3.13" + scikit-learn: ["1.3.*", "1.4.*"] + include: # Full test run on Windows - os: windows-latest From 62c0651e1409bfda07fd2c3668fcf10af88cd49f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Mon, 29 Dec 2025 22:18:29 +0100 Subject: [PATCH 06/18] Update test.yml --- .github/workflows/test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 32d3602e1..5ffe08fae 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -34,7 +34,9 @@ jobs: exclude: # incompatible version combinations - python-version: "3.13" - scikit-learn: ["1.3.*", "1.4.*"] + scikit-learn: "1.3.*" + - python-version: "3.13" + scikit-learn: "1.4.*" include: # Full test run on Windows From e902465e9bfb598d5ee3b66f2d531c23004e13aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Mon, 29 Dec 2025 23:30:09 +0100 Subject: [PATCH 07/18] Update test.yml --- .github/workflows/test.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5ffe08fae..3d7998f24 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,14 +43,14 @@ jobs: - os: windows-latest python-version: "3.12" scikit-learn: "1.5.*" - sklearn-only: "false" + sklearn-only: "true" # Coverage run - os: ubuntu-latest python-version: "3.12" scikit-learn: "1.5.*" - sklearn-only: "false" - code-cov: true + sklearn-only: "true" + # code-cov: true steps: - uses: actions/checkout@v4 From 00f1b29d71d27e1106416f76a1239587b95e773c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Tue, 30 Dec 2025 00:04:53 +0100 Subject: [PATCH 08/18] Update test.yml --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3d7998f24..7940f4a7e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,7 +40,8 @@ jobs: include: # Full test run on Windows - - os: windows-latest + - os: ubuntu-latest + # - os: windows-latest python-version: "3.12" scikit-learn: "1.5.*" sklearn-only: "true" From 406205a714ebbd2178eef88e5d022989b4624e70 Mon Sep 17 00:00:00 2001 From: JATAYU000 Date: Tue, 30 Dec 2025 14:43:14 +0530 Subject: [PATCH 09/18] mark xfail --- tests/test_datasets/test_dataset_functions.py | 16 ++++++++++++++++ tests/test_runs/test_run_functions.py | 4 ++++ tests/test_setups/test_setup_functions.py | 1 + tests/test_study/test_study_functions.py | 2 ++ tests/test_tasks/test_classification_task.py | 3 +++ tests/test_tasks/test_learning_curve_task.py | 4 ++++ tests/test_tasks/test_regression_task.py | 2 ++ tests/test_tasks/test_task_functions.py | 5 +++++ 8 files changed, 37 insertions(+) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 266a6f6f7..f63bd1534 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -244,6 +244,7 @@ def test_get_datasets(self): assert len(datasets) == 2 _assert_datasets_retrieved_successfully([1, 2]) + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_by_name(self): dataset = openml.datasets.get_dataset("anneal") assert type(dataset) == OpenMLDataset @@ -262,6 +263,7 @@ def test_get_dataset_download_all_files(self): # test_get_dataset_lazy raise NotImplementedError + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_uint8_dtype(self): dataset = openml.datasets.get_dataset(1) assert type(dataset) == OpenMLDataset @@ -280,6 +282,7 @@ def test_dataset_by_name_cannot_access_private_data(self): self.use_production_server() self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE") + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_lazy_all_functions(self): """Test that all expected functionality is available without downloading the dataset.""" dataset = openml.datasets.get_dataset(1) @@ -309,6 +312,7 @@ def ensure_absence_of_real_data(): assert classes == ["1", "2", "3", "4", "5", "U"] ensure_absence_of_real_data() + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_sparse(self): dataset = openml.datasets.get_dataset(102) X, *_ = dataset.get_data() @@ -327,6 +331,7 @@ def test__get_dataset_description(self): description_xml_path = os.path.join(self.workdir, "description.xml") assert os.path.exists(description_xml_path) + @pytest.mark.xfail(reason="failures_issue_1544") def test__getarff_path_dataset_arff(self): openml.config.set_root_cache_directory(self.static_cache_dir) description = _get_dataset_description(self.workdir, 2) @@ -430,12 +435,14 @@ def test__getarff_md5_issue(self): openml.config.connection_n_retries = n + @pytest.mark.xfail(reason="failures_issue_1544") def test__get_dataset_features(self): features_file = _get_dataset_features_file(self.workdir, 2) assert isinstance(features_file, Path) features_xml_path = self.workdir / "features.xml" assert features_xml_path.exists() + @pytest.mark.xfail(reason="failures_issue_1544") def test__get_dataset_qualities(self): qualities = _get_dataset_qualities_file(self.workdir, 2) assert isinstance(qualities, Path) @@ -853,6 +860,7 @@ def test_create_invalid_dataset(self): param["data"] = data[0] self.assertRaises(ValueError, create_dataset, **param) + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_online_dataset_arff(self): dataset_id = 100 # Australian # lazy loading not used as arff file is checked. @@ -1332,6 +1340,7 @@ def test_list_qualities(self): assert isinstance(qualities, list) is True assert all(isinstance(q, str) for q in qualities) is True + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_cache_format_pickle(self): dataset = openml.datasets.get_dataset(1) dataset.get_data() @@ -1347,6 +1356,7 @@ def test_get_dataset_cache_format_pickle(self): assert len(categorical) == X.shape[1] assert len(attribute_names) == X.shape[1] + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_cache_format_feather(self): # This test crashed due to using the parquet file by default, which is downloaded # from minio. However, there is a mismatch between OpenML test server and minio IDs. @@ -1523,6 +1533,7 @@ def test_list_datasets_with_high_size_parameter(self): (None, None, ["wrong", "sunny"]), ], ) +@pytest.mark.xfail(reason="failures_issue_1544") def test_invalid_attribute_validations( default_target_attribute, row_id_attribute, @@ -1584,6 +1595,7 @@ def test_invalid_attribute_validations( (None, None, ["outlook", "windy"]), ], ) +@pytest.mark.xfail(reason="failures_issue_1544") def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute): data = [ ["a", "sunny", 85.0, 85.0, "FALSE", "no"], @@ -1802,6 +1814,7 @@ def test_list_datasets_by_number_instances(all_datasets: pd.DataFrame): _assert_datasets_have_id_and_valid_status(small_datasets) +@pytest.mark.xfail(reason="failures_issue_1544") def test_list_datasets_by_number_features(all_datasets: pd.DataFrame): wide_datasets = openml.datasets.list_datasets(number_features="50..100") assert 8 <= len(wide_datasets) < len(all_datasets) @@ -1814,12 +1827,14 @@ def test_list_datasets_by_number_classes(all_datasets: pd.DataFrame): _assert_datasets_have_id_and_valid_status(five_class_datasets) +@pytest.mark.xfail(reason="failures_issue_1544") def test_list_datasets_by_number_missing_values(all_datasets: pd.DataFrame): na_datasets = openml.datasets.list_datasets(number_missing_values="5..100") assert 5 <= len(na_datasets) < len(all_datasets) _assert_datasets_have_id_and_valid_status(na_datasets) +@pytest.mark.xfail(reason="failures_issue_1544") def test_list_datasets_combined_filters(all_datasets: pd.DataFrame): combined_filter_datasets = openml.datasets.list_datasets( tag="study_14", @@ -1892,6 +1907,7 @@ def isolate_for_test(): ("with_data", "with_qualities", "with_features"), itertools.product([True, False], repeat=3), ) +@pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_lazy_behavior( isolate_for_test, with_data: bool, with_qualities: bool, with_features: bool ): diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 3bb4b0a0c..c4b1e4cc7 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -1695,6 +1695,7 @@ def test_format_prediction_non_supervised(self): ): format_prediction(clustering, *ignored_input) + @pytest.mark.xfail(reason="failures_issue_1544") def test_format_prediction_classification_no_probabilities(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1704,6 +1705,7 @@ def test_format_prediction_classification_no_probabilities(self): with pytest.raises(ValueError, match="`proba` is required for classification task"): format_prediction(classification, *ignored_input, proba=None) + @pytest.mark.xfail(reason="failures_issue_1544") def test_format_prediction_classification_incomplete_probabilities(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1714,6 +1716,7 @@ def test_format_prediction_classification_incomplete_probabilities(self): with pytest.raises(ValueError, match="Each class should have a predicted probability"): format_prediction(classification, *ignored_input, proba=incomplete_probabilities) + @pytest.mark.xfail(reason="failures_issue_1544") def test_format_prediction_task_without_classlabels_set(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1724,6 +1727,7 @@ def test_format_prediction_task_without_classlabels_set(self): with pytest.raises(ValueError, match="The classification task must have class labels set"): format_prediction(classification, *ignored_input, proba={}) + @pytest.mark.xfail(reason="failures_issue_1544") def test_format_prediction_task_learning_curve_sample_not_set(self): learning_curve = openml.tasks.get_task(801, download_data=False) # diabetes;crossvalidation probabilities = {c: 0.2 for c in learning_curve.class_labels} diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index 18d7f5cc6..9ffd7b9c1 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -166,6 +166,7 @@ def test_list_setups_output_format(self): assert isinstance(setups, pd.DataFrame) assert len(setups) == 10 + @pytest.mark.xfail(reason="failures_issue_1544") def test_setuplist_offset(self): size = 10 setups = openml.setups.list_setups(offset=0, size=size) diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index 40026592f..837feb5bb 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -148,6 +148,7 @@ def test_publish_empty_study_implicit(self): self._test_publish_empty_study_is_allowed(explicit=False) @pytest.mark.flaky() + @pytest.mark.xfail(reason="failures_issue_1544") def test_publish_study(self): # get some random runs to attach run_list = openml.evaluations.list_evaluations("predictive_accuracy", size=10) @@ -217,6 +218,7 @@ def test_publish_study(self): res = openml.study.delete_study(study.id) assert res + @pytest.mark.xfail(reason="failures_issue_1544") def test_study_attach_illegal(self): run_list = openml.runs.list_runs(size=10) assert len(run_list) == 10 diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py index d4f2ed9d7..70c3115e5 100644 --- a/tests/test_tasks/test_classification_task.py +++ b/tests/test_tasks/test_classification_task.py @@ -18,6 +18,7 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.SUPERVISED_CLASSIFICATION self.estimation_procedure = 5 + @pytest.mark.xfail(reason="failures_issue_1544") def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id @@ -25,12 +26,14 @@ def test_download_task(self): assert task.dataset_id == 20 assert task.estimation_procedure_id == self.estimation_procedure + @pytest.mark.xfail(reason="failures_issue_1544") def test_class_labels(self): task = get_task(self.task_id) assert task.class_labels == ["tested_negative", "tested_positive"] @pytest.mark.server() +@pytest.mark.xfail(reason="failures_issue_1544") def test_get_X_and_Y(): task = get_task(119) X, Y = task.get_X_and_y() diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py index 885f80a27..f8ed876cc 100644 --- a/tests/test_tasks/test_learning_curve_task.py +++ b/tests/test_tasks/test_learning_curve_task.py @@ -2,6 +2,7 @@ from __future__ import annotations import pandas as pd +import pytest from openml.tasks import TaskType, get_task @@ -17,6 +18,7 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.LEARNING_CURVE self.estimation_procedure = 13 + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_X_and_Y(self): X, Y = super().test_get_X_and_Y() assert X.shape == (768, 8) @@ -25,12 +27,14 @@ def test_get_X_and_Y(self): assert isinstance(Y, pd.Series) assert pd.api.types.is_categorical_dtype(Y) + @pytest.mark.xfail(reason="failures_issue_1544") def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id assert task.task_type_id == TaskType.LEARNING_CURVE assert task.dataset_id == 20 + @pytest.mark.xfail(reason="failures_issue_1544") def test_class_labels(self): task = get_task(self.task_id) assert task.class_labels == ["tested_negative", "tested_positive"] diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py index 14ed59470..5c3e79061 100644 --- a/tests/test_tasks/test_regression_task.py +++ b/tests/test_tasks/test_regression_task.py @@ -4,6 +4,7 @@ import ast import pandas as pd +import pytest import openml from openml.exceptions import OpenMLServerException @@ -48,6 +49,7 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.SUPERVISED_REGRESSION + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_X_and_Y(self): X, Y = super().test_get_X_and_Y() assert X.shape == (194, 32) diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index 5f1d577c0..0e9b5fdce 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -100,6 +100,7 @@ def test_list_tasks(self): for task in tasks.to_dict(orient="index").values(): self._check_task(task) + @pytest.mark.xfail(reason="failures_issue_1544") def test_list_tasks_paginate(self): size = 10 max = 100 @@ -139,6 +140,7 @@ def test__get_task_live(self): # https://github.com/openml/openml-python/issues/378 openml.tasks.get_task(34536) + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_task(self): task = openml.tasks.get_task(1, download_data=True) # anneal; crossvalidation assert isinstance(task, OpenMLTask) @@ -152,6 +154,7 @@ def test_get_task(self): os.path.join(self.workdir, "org", "openml", "test", "datasets", "1", "dataset.arff") ) + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_task_lazy(self): task = openml.tasks.get_task(2, download_data=False) # anneal; crossvalidation assert isinstance(task, OpenMLTask) @@ -191,6 +194,7 @@ def assert_and_raise(*args, **kwargs): # Now the file should no longer exist assert not os.path.exists(os.path.join(os.getcwd(), "tasks", "1", "tasks.xml")) + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_task_with_cache(self): openml.config.set_root_cache_directory(self.static_cache_dir) task = openml.tasks.get_task(1) @@ -206,6 +210,7 @@ def test_get_task_different_types(self): # Issue 538, get_task failing with clustering task. openml.tasks.functions.get_task(126033) + @pytest.mark.xfail(reason="failures_issue_1544") def test_download_split(self): task = openml.tasks.get_task(1) # anneal; crossvalidation split = task.download_split() From f38efad97c3757472e3a8d0e8eb2f9d215965c19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Tue, 30 Dec 2025 16:20:11 +0100 Subject: [PATCH 10/18] Update test.yml --- .github/workflows/test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 875429dd4..2865264bb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,8 +40,7 @@ jobs: include: # Full test run on Windows - - os: ubuntu-latest - # - os: windows-latest + - os: windows-latest python-version: "3.12" scikit-learn: "1.5.*" sklearn-only: "true" @@ -51,7 +50,7 @@ jobs: python-version: "3.12" scikit-learn: "1.5.*" sklearn-only: "true" - # code-cov: true + code-cov: true steps: - uses: actions/checkout@v6 From 9220d369d840b43d732366fd11f4e7c27671eae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Tue, 30 Dec 2025 16:21:50 +0100 Subject: [PATCH 11/18] Update test.yml --- .github/workflows/test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2865264bb..2ba950e8d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,6 +14,7 @@ on: branches: - main - develop + concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true From f5a13bbfd0d9aaf5298dfcf7c2bc46cc90c96b0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Tue, 30 Dec 2025 16:26:52 +0100 Subject: [PATCH 12/18] Update test.yml --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2ba950e8d..b77cfd38c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -44,13 +44,13 @@ jobs: - os: windows-latest python-version: "3.12" scikit-learn: "1.5.*" - sklearn-only: "true" + sklearn-only: "false" # Coverage run - os: ubuntu-latest python-version: "3.12" scikit-learn: "1.5.*" - sklearn-only: "true" + sklearn-only: "false" code-cov: true steps: From b7401017c193a7581e889daecd73280b419da4bd Mon Sep 17 00:00:00 2001 From: JATAYU000 Date: Wed, 31 Dec 2025 11:10:44 +0530 Subject: [PATCH 13/18] More xfail skips --- tests/test_datasets/test_dataset.py | 2 ++ tests/test_datasets/test_dataset_functions.py | 8 ++++++-- tests/test_runs/test_run.py | 1 + tests/test_tasks/test_supervised_task.py | 2 ++ tests/test_tasks/test_task_functions.py | 2 +- tests/test_tasks/test_task_methods.py | 2 ++ 6 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index 86a4d3f57..58eea1f05 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -302,6 +302,7 @@ def test_get_feature_with_ontology_data_id_11(): assert len(dataset.features[2].ontologies) >= 1 assert len(dataset.features[3].ontologies) >= 1 +@pytest.mark.xfail(reason="failures_issue_1544") def test_add_remove_ontology_to_dataset(): did = 1 feature_index = 1 @@ -309,6 +310,7 @@ def test_add_remove_ontology_to_dataset(): openml.datasets.functions.data_feature_add_ontology(did, feature_index, ontology) openml.datasets.functions.data_feature_remove_ontology(did, feature_index, ontology) +@pytest.mark.xfail(reason="failures_issue_1544") def test_add_same_ontology_multiple_features(): did = 1 ontology = "https://www.openml.org/unittest/" + str(time()) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index f63bd1534..1c2058d21 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -513,6 +513,7 @@ def test_deletion_of_cache_dir_faulty_download(self, patch): datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets") assert len(os.listdir(datasets_cache_dir)) == 0 + @pytest.mark.xfail(reason="failures_issue_1544") def test_publish_dataset(self): # lazy loading not possible as we need the arff-file. openml.datasets.get_dataset(3, download_data=True) @@ -1389,6 +1390,7 @@ def test_get_dataset_cache_format_feather(self): assert len(categorical) == X.shape[1] assert len(attribute_names) == X.shape[1] + @pytest.mark.xfail(reason="failures_issue_1544") def test_data_edit_non_critical_field(self): # Case 1 # All users can edit non-critical fields of datasets @@ -1410,6 +1412,7 @@ def test_data_edit_non_critical_field(self): edited_dataset = openml.datasets.get_dataset(did) assert edited_dataset.description == desc + @pytest.mark.xfail(reason="failures_issue_1544") def test_data_edit_critical_field(self): # Case 2 # only owners (or admin) can edit all critical fields of datasets @@ -1458,6 +1461,7 @@ def test_data_edit_requires_valid_dataset(self): description="xor operation dataset", ) + @pytest.mark.xfail(reason="failures_issue_1544") def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self): # Need to own a dataset to be able to edit meta-data # Will be creating a forked version of an existing dataset to allow the unit test user @@ -1533,7 +1537,7 @@ def test_list_datasets_with_high_size_parameter(self): (None, None, ["wrong", "sunny"]), ], ) -@pytest.mark.xfail(reason="failures_issue_1544") +@pytest.mark.xfail(reason="failures_issue_1544",strict=False) def test_invalid_attribute_validations( default_target_attribute, row_id_attribute, @@ -1595,7 +1599,7 @@ def test_invalid_attribute_validations( (None, None, ["outlook", "windy"]), ], ) -@pytest.mark.xfail(reason="failures_issue_1544") +@pytest.mark.xfail(reason="failures_issue_1544",strict=False) def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute): data = [ ["a", "sunny", 85.0, 85.0, "FALSE", "no"], diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py index 088856450..7d60e35bd 100644 --- a/tests/test_runs/test_run.py +++ b/tests/test_runs/test_run.py @@ -25,6 +25,7 @@ class TestRun(TestBase): # Splitting not helpful, these test's don't rely on the server and take # less than 1 seconds + @pytest.mark.xfail(reason="failures_issue_1544") def test_tagging(self): runs = openml.runs.list_runs(size=1) assert not runs.empty, "Test server state is incorrect" diff --git a/tests/test_tasks/test_supervised_task.py b/tests/test_tasks/test_supervised_task.py index 9c90b7e03..48e036d3e 100644 --- a/tests/test_tasks/test_supervised_task.py +++ b/tests/test_tasks/test_supervised_task.py @@ -6,6 +6,7 @@ import pandas as pd from openml.tasks import get_task +import pytest from .test_task import OpenMLTaskTest @@ -27,6 +28,7 @@ def setUpClass(cls): def setUp(self, n_levels: int = 1): super().setUp() + @pytest.mark.xfail(reason="failures_issue_1544") def test_get_X_and_Y(self) -> tuple[pd.DataFrame, pd.Series]: task = get_task(self.task_id) X, Y = task.get_X_and_y() diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index 0e9b5fdce..25dab05ea 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -100,7 +100,6 @@ def test_list_tasks(self): for task in tasks.to_dict(orient="index").values(): self._check_task(task) - @pytest.mark.xfail(reason="failures_issue_1544") def test_list_tasks_paginate(self): size = 10 max = 100 @@ -177,6 +176,7 @@ def test_get_task_lazy(self): ) @mock.patch("openml.tasks.functions.get_dataset") + @pytest.mark.xfail(reason="failures_issue_1544") def test_removal_upon_download_failure(self, get_dataset): class WeirdException(Exception): pass diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py index 4480c2cbc..65c4ac37c 100644 --- a/tests/test_tasks/test_task_methods.py +++ b/tests/test_tasks/test_task_methods.py @@ -5,6 +5,7 @@ import openml from openml.testing import TestBase +import pytest # Common methods between tasks @@ -15,6 +16,7 @@ def setUp(self): def tearDown(self): super().tearDown() + @pytest.mark.xfail(reason="failures_issue_1544") def test_tagging(self): task = openml.tasks.get_task(1) # anneal; crossvalidation # tags can be at most 64 alphanumeric (+ underscore) chars From 7179d4b9e63b5949006693eb0bf6c68d2cb7dd7e Mon Sep 17 00:00:00 2001 From: JATAYU000 Date: Wed, 31 Dec 2025 11:13:06 +0530 Subject: [PATCH 14/18] Only remove newly added --- openml/tasks/functions.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py index d2bf5e946..e9b879ae4 100644 --- a/openml/tasks/functions.py +++ b/openml/tasks/functions.py @@ -415,8 +415,9 @@ def get_task( if not isinstance(task_id, int): raise TypeError(f"Task id should be integer, is {type(task_id)}") - tid_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) - + cache_key_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id) + tid_cache_dir = cache_key_dir / str(task_id) + tid_cache_dir_existed = tid_cache_dir.exists() try: task = _get_task_description(task_id) dataset = get_dataset(task.dataset_id, **get_dataset_kwargs) @@ -430,7 +431,8 @@ def get_task( if download_splits and isinstance(task, OpenMLSupervisedTask): task.download_split() except Exception as e: - openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir) + if not tid_cache_dir_existed: + openml.utils._remove_cache_dir_for_id(TASKS_CACHE_DIR_NAME, tid_cache_dir) raise e return task From 9889bbf23b171e9395c4c24e95009e223e591db8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 31 Dec 2025 10:26:14 +0100 Subject: [PATCH 15/18] Update test_run_functions.py --- tests/test_runs/test_run_functions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 3bb4b0a0c..37f478fa7 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -822,7 +822,7 @@ def test_run_and_upload_gridsearch(self): assert len(run.trace.trace_iterations) == 9 @pytest.mark.sklearn() - @pytest.mark.xfail(reason="failures_issue_1544") + @pytest.mark.skip(reason="failures_issue_1544") def test_run_and_upload_randomsearch(self): randomsearch = RandomizedSearchCV( RandomForestClassifier(n_estimators=5), From ed894c1fbd05a33b71369c7a5da18aecca65761c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 31 Dec 2025 10:52:47 +0100 Subject: [PATCH 16/18] Update test_run_functions.py --- tests/test_runs/test_run_functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 3988b0037..144abb6a7 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -792,6 +792,7 @@ def test_run_and_upload_knn_pipeline(self, warnings_mock): call_count += 1 assert call_count == 3 + @pytest.mark.skip(reason="failures_issue_1544") @pytest.mark.sklearn() def test_run_and_upload_gridsearch(self): estimator_name = ( @@ -847,6 +848,7 @@ def test_run_and_upload_randomsearch(self): trace = openml.runs.get_run_trace(run.run_id) assert len(trace.trace_iterations) == 5 + @pytest.mark.skip(reason="failures_issue_1544") @pytest.mark.sklearn() def test_run_and_upload_maskedarrays(self): # This testcase is important for 2 reasons: From 725cbce95d42c5dd946171e24c7091315a9256e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= Date: Wed, 31 Dec 2025 11:06:17 +0100 Subject: [PATCH 17/18] Update test_run_functions.py --- tests/test_runs/test_run_functions.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 144abb6a7..645e008db 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -625,6 +625,7 @@ def _run_and_upload_regression( sentinel=sentinel, ) + @pytest.mark.skip(reason="failures_issue_1544") @pytest.mark.sklearn() def test_run_and_upload_logistic_regression(self): lr = LogisticRegression(solver="lbfgs", max_iter=1000) @@ -633,6 +634,7 @@ def test_run_and_upload_logistic_regression(self): n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"] self._run_and_upload_classification(lr, task_id, n_missing_vals, n_test_obs, "62501") + @pytest.mark.skip(reason="failures_issue_1544") @pytest.mark.sklearn() def test_run_and_upload_linear_regression(self): lr = LinearRegression() @@ -663,6 +665,7 @@ def test_run_and_upload_linear_regression(self): n_test_obs = self.TEST_SERVER_TASK_REGRESSION["n_test_obs"] self._run_and_upload_regression(lr, task_id, n_missing_vals, n_test_obs, "62501") + @pytest.mark.skip(reason="failures_issue_1544") @pytest.mark.sklearn() def test_run_and_upload_pipeline_dummy_pipeline(self): pipeline1 = Pipeline( @@ -676,6 +679,7 @@ def test_run_and_upload_pipeline_dummy_pipeline(self): n_test_obs = self.TEST_SERVER_TASK_SIMPLE["n_test_obs"] self._run_and_upload_classification(pipeline1, task_id, n_missing_vals, n_test_obs, "62501") + @pytest.mark.skip(reason="failures_issue_1544") @pytest.mark.sklearn() @unittest.skipIf( Version(sklearn.__version__) < Version("0.20"), @@ -740,6 +744,7 @@ def get_ct_cf(nominal_indices, numeric_indices): sentinel=sentinel, ) + @pytest.mark.skip(reason="failures_issue_1544") @pytest.mark.sklearn() @unittest.skip("https://github.com/openml/OpenML/issues/1180") @unittest.skipIf( From b1e06ecc0a49f853af851f0dbc1da652549d1112 Mon Sep 17 00:00:00 2001 From: JATAYU000 Date: Wed, 31 Dec 2025 16:19:33 +0530 Subject: [PATCH 18/18] Revert all xfails which passed --- tests/test_datasets/test_dataset.py | 2 -- tests/test_datasets/test_dataset_functions.py | 20 ------------------- tests/test_runs/test_run.py | 1 - tests/test_runs/test_run_functions.py | 4 ---- tests/test_setups/test_setup_functions.py | 1 - tests/test_study/test_study_functions.py | 2 -- tests/test_tasks/test_classification_task.py | 3 --- tests/test_tasks/test_learning_curve_task.py | 3 --- tests/test_tasks/test_regression_task.py | 1 - tests/test_tasks/test_supervised_task.py | 1 - tests/test_tasks/test_task_functions.py | 4 ---- tests/test_tasks/test_task_methods.py | 1 - 12 files changed, 43 deletions(-) diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index bf570cd3c..66e9b8554 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -303,7 +303,6 @@ def test_get_feature_with_ontology_data_id_11(): assert len(dataset.features[2].ontologies) >= 1 assert len(dataset.features[3].ontologies) >= 1 -@pytest.mark.xfail(reason="failures_issue_1544") def test_add_remove_ontology_to_dataset(): did = 1 feature_index = 1 @@ -311,7 +310,6 @@ def test_add_remove_ontology_to_dataset(): openml.datasets.functions.data_feature_add_ontology(did, feature_index, ontology) openml.datasets.functions.data_feature_remove_ontology(did, feature_index, ontology) -@pytest.mark.xfail(reason="failures_issue_1544") def test_add_same_ontology_multiple_features(): did = 1 ontology = "https://www.openml.org/unittest/" + str(time()) diff --git a/tests/test_datasets/test_dataset_functions.py b/tests/test_datasets/test_dataset_functions.py index 1c2058d21..266a6f6f7 100644 --- a/tests/test_datasets/test_dataset_functions.py +++ b/tests/test_datasets/test_dataset_functions.py @@ -244,7 +244,6 @@ def test_get_datasets(self): assert len(datasets) == 2 _assert_datasets_retrieved_successfully([1, 2]) - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_by_name(self): dataset = openml.datasets.get_dataset("anneal") assert type(dataset) == OpenMLDataset @@ -263,7 +262,6 @@ def test_get_dataset_download_all_files(self): # test_get_dataset_lazy raise NotImplementedError - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_uint8_dtype(self): dataset = openml.datasets.get_dataset(1) assert type(dataset) == OpenMLDataset @@ -282,7 +280,6 @@ def test_dataset_by_name_cannot_access_private_data(self): self.use_production_server() self.assertRaises(OpenMLPrivateDatasetError, openml.datasets.get_dataset, "NAME_GOES_HERE") - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_lazy_all_functions(self): """Test that all expected functionality is available without downloading the dataset.""" dataset = openml.datasets.get_dataset(1) @@ -312,7 +309,6 @@ def ensure_absence_of_real_data(): assert classes == ["1", "2", "3", "4", "5", "U"] ensure_absence_of_real_data() - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_sparse(self): dataset = openml.datasets.get_dataset(102) X, *_ = dataset.get_data() @@ -331,7 +327,6 @@ def test__get_dataset_description(self): description_xml_path = os.path.join(self.workdir, "description.xml") assert os.path.exists(description_xml_path) - @pytest.mark.xfail(reason="failures_issue_1544") def test__getarff_path_dataset_arff(self): openml.config.set_root_cache_directory(self.static_cache_dir) description = _get_dataset_description(self.workdir, 2) @@ -435,14 +430,12 @@ def test__getarff_md5_issue(self): openml.config.connection_n_retries = n - @pytest.mark.xfail(reason="failures_issue_1544") def test__get_dataset_features(self): features_file = _get_dataset_features_file(self.workdir, 2) assert isinstance(features_file, Path) features_xml_path = self.workdir / "features.xml" assert features_xml_path.exists() - @pytest.mark.xfail(reason="failures_issue_1544") def test__get_dataset_qualities(self): qualities = _get_dataset_qualities_file(self.workdir, 2) assert isinstance(qualities, Path) @@ -513,7 +506,6 @@ def test_deletion_of_cache_dir_faulty_download(self, patch): datasets_cache_dir = os.path.join(self.workdir, "org", "openml", "test", "datasets") assert len(os.listdir(datasets_cache_dir)) == 0 - @pytest.mark.xfail(reason="failures_issue_1544") def test_publish_dataset(self): # lazy loading not possible as we need the arff-file. openml.datasets.get_dataset(3, download_data=True) @@ -861,7 +853,6 @@ def test_create_invalid_dataset(self): param["data"] = data[0] self.assertRaises(ValueError, create_dataset, **param) - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_online_dataset_arff(self): dataset_id = 100 # Australian # lazy loading not used as arff file is checked. @@ -1341,7 +1332,6 @@ def test_list_qualities(self): assert isinstance(qualities, list) is True assert all(isinstance(q, str) for q in qualities) is True - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_cache_format_pickle(self): dataset = openml.datasets.get_dataset(1) dataset.get_data() @@ -1357,7 +1347,6 @@ def test_get_dataset_cache_format_pickle(self): assert len(categorical) == X.shape[1] assert len(attribute_names) == X.shape[1] - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_cache_format_feather(self): # This test crashed due to using the parquet file by default, which is downloaded # from minio. However, there is a mismatch between OpenML test server and minio IDs. @@ -1390,7 +1379,6 @@ def test_get_dataset_cache_format_feather(self): assert len(categorical) == X.shape[1] assert len(attribute_names) == X.shape[1] - @pytest.mark.xfail(reason="failures_issue_1544") def test_data_edit_non_critical_field(self): # Case 1 # All users can edit non-critical fields of datasets @@ -1412,7 +1400,6 @@ def test_data_edit_non_critical_field(self): edited_dataset = openml.datasets.get_dataset(did) assert edited_dataset.description == desc - @pytest.mark.xfail(reason="failures_issue_1544") def test_data_edit_critical_field(self): # Case 2 # only owners (or admin) can edit all critical fields of datasets @@ -1461,7 +1448,6 @@ def test_data_edit_requires_valid_dataset(self): description="xor operation dataset", ) - @pytest.mark.xfail(reason="failures_issue_1544") def test_data_edit_cannot_edit_critical_field_if_dataset_has_task(self): # Need to own a dataset to be able to edit meta-data # Will be creating a forked version of an existing dataset to allow the unit test user @@ -1537,7 +1523,6 @@ def test_list_datasets_with_high_size_parameter(self): (None, None, ["wrong", "sunny"]), ], ) -@pytest.mark.xfail(reason="failures_issue_1544",strict=False) def test_invalid_attribute_validations( default_target_attribute, row_id_attribute, @@ -1599,7 +1584,6 @@ def test_invalid_attribute_validations( (None, None, ["outlook", "windy"]), ], ) -@pytest.mark.xfail(reason="failures_issue_1544",strict=False) def test_valid_attribute_validations(default_target_attribute, row_id_attribute, ignore_attribute): data = [ ["a", "sunny", 85.0, 85.0, "FALSE", "no"], @@ -1818,7 +1802,6 @@ def test_list_datasets_by_number_instances(all_datasets: pd.DataFrame): _assert_datasets_have_id_and_valid_status(small_datasets) -@pytest.mark.xfail(reason="failures_issue_1544") def test_list_datasets_by_number_features(all_datasets: pd.DataFrame): wide_datasets = openml.datasets.list_datasets(number_features="50..100") assert 8 <= len(wide_datasets) < len(all_datasets) @@ -1831,14 +1814,12 @@ def test_list_datasets_by_number_classes(all_datasets: pd.DataFrame): _assert_datasets_have_id_and_valid_status(five_class_datasets) -@pytest.mark.xfail(reason="failures_issue_1544") def test_list_datasets_by_number_missing_values(all_datasets: pd.DataFrame): na_datasets = openml.datasets.list_datasets(number_missing_values="5..100") assert 5 <= len(na_datasets) < len(all_datasets) _assert_datasets_have_id_and_valid_status(na_datasets) -@pytest.mark.xfail(reason="failures_issue_1544") def test_list_datasets_combined_filters(all_datasets: pd.DataFrame): combined_filter_datasets = openml.datasets.list_datasets( tag="study_14", @@ -1911,7 +1892,6 @@ def isolate_for_test(): ("with_data", "with_qualities", "with_features"), itertools.product([True, False], repeat=3), ) -@pytest.mark.xfail(reason="failures_issue_1544") def test_get_dataset_lazy_behavior( isolate_for_test, with_data: bool, with_qualities: bool, with_features: bool ): diff --git a/tests/test_runs/test_run.py b/tests/test_runs/test_run.py index 6fbe166ff..034b731aa 100644 --- a/tests/test_runs/test_run.py +++ b/tests/test_runs/test_run.py @@ -25,7 +25,6 @@ class TestRun(TestBase): # Splitting not helpful, these test's don't rely on the server and take # less than 1 seconds - @pytest.mark.xfail(reason="failures_issue_1544") def test_tagging(self): runs = openml.runs.list_runs(size=1) assert not runs.empty, "Test server state is incorrect" diff --git a/tests/test_runs/test_run_functions.py b/tests/test_runs/test_run_functions.py index 645e008db..e4cec56ab 100644 --- a/tests/test_runs/test_run_functions.py +++ b/tests/test_runs/test_run_functions.py @@ -1678,7 +1678,6 @@ def test_format_prediction_non_supervised(self): ): format_prediction(clustering, *ignored_input) - @pytest.mark.xfail(reason="failures_issue_1544") def test_format_prediction_classification_no_probabilities(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1688,7 +1687,6 @@ def test_format_prediction_classification_no_probabilities(self): with pytest.raises(ValueError, match="`proba` is required for classification task"): format_prediction(classification, *ignored_input, proba=None) - @pytest.mark.xfail(reason="failures_issue_1544") def test_format_prediction_classification_incomplete_probabilities(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1699,7 +1697,6 @@ def test_format_prediction_classification_incomplete_probabilities(self): with pytest.raises(ValueError, match="Each class should have a predicted probability"): format_prediction(classification, *ignored_input, proba=incomplete_probabilities) - @pytest.mark.xfail(reason="failures_issue_1544") def test_format_prediction_task_without_classlabels_set(self): classification = openml.tasks.get_task( self.TEST_SERVER_TASK_SIMPLE["task_id"], @@ -1710,7 +1707,6 @@ def test_format_prediction_task_without_classlabels_set(self): with pytest.raises(ValueError, match="The classification task must have class labels set"): format_prediction(classification, *ignored_input, proba={}) - @pytest.mark.xfail(reason="failures_issue_1544") def test_format_prediction_task_learning_curve_sample_not_set(self): learning_curve = openml.tasks.get_task(801, download_data=False) # diabetes;crossvalidation probabilities = {c: 0.2 for c in learning_curve.class_labels} diff --git a/tests/test_setups/test_setup_functions.py b/tests/test_setups/test_setup_functions.py index d371f6588..42af5362b 100644 --- a/tests/test_setups/test_setup_functions.py +++ b/tests/test_setups/test_setup_functions.py @@ -163,7 +163,6 @@ def test_list_setups_output_format(self): assert isinstance(setups, pd.DataFrame) assert len(setups) == 10 - @pytest.mark.xfail(reason="failures_issue_1544") def test_setuplist_offset(self): size = 10 setups = openml.setups.list_setups(offset=0, size=size) diff --git a/tests/test_study/test_study_functions.py b/tests/test_study/test_study_functions.py index 837feb5bb..40026592f 100644 --- a/tests/test_study/test_study_functions.py +++ b/tests/test_study/test_study_functions.py @@ -148,7 +148,6 @@ def test_publish_empty_study_implicit(self): self._test_publish_empty_study_is_allowed(explicit=False) @pytest.mark.flaky() - @pytest.mark.xfail(reason="failures_issue_1544") def test_publish_study(self): # get some random runs to attach run_list = openml.evaluations.list_evaluations("predictive_accuracy", size=10) @@ -218,7 +217,6 @@ def test_publish_study(self): res = openml.study.delete_study(study.id) assert res - @pytest.mark.xfail(reason="failures_issue_1544") def test_study_attach_illegal(self): run_list = openml.runs.list_runs(size=10) assert len(run_list) == 10 diff --git a/tests/test_tasks/test_classification_task.py b/tests/test_tasks/test_classification_task.py index 70c3115e5..d4f2ed9d7 100644 --- a/tests/test_tasks/test_classification_task.py +++ b/tests/test_tasks/test_classification_task.py @@ -18,7 +18,6 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.SUPERVISED_CLASSIFICATION self.estimation_procedure = 5 - @pytest.mark.xfail(reason="failures_issue_1544") def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id @@ -26,14 +25,12 @@ def test_download_task(self): assert task.dataset_id == 20 assert task.estimation_procedure_id == self.estimation_procedure - @pytest.mark.xfail(reason="failures_issue_1544") def test_class_labels(self): task = get_task(self.task_id) assert task.class_labels == ["tested_negative", "tested_positive"] @pytest.mark.server() -@pytest.mark.xfail(reason="failures_issue_1544") def test_get_X_and_Y(): task = get_task(119) X, Y = task.get_X_and_y() diff --git a/tests/test_tasks/test_learning_curve_task.py b/tests/test_tasks/test_learning_curve_task.py index f8ed876cc..4a3dede4e 100644 --- a/tests/test_tasks/test_learning_curve_task.py +++ b/tests/test_tasks/test_learning_curve_task.py @@ -18,7 +18,6 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.LEARNING_CURVE self.estimation_procedure = 13 - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_X_and_Y(self): X, Y = super().test_get_X_and_Y() assert X.shape == (768, 8) @@ -27,14 +26,12 @@ def test_get_X_and_Y(self): assert isinstance(Y, pd.Series) assert pd.api.types.is_categorical_dtype(Y) - @pytest.mark.xfail(reason="failures_issue_1544") def test_download_task(self): task = super().test_download_task() assert task.task_id == self.task_id assert task.task_type_id == TaskType.LEARNING_CURVE assert task.dataset_id == 20 - @pytest.mark.xfail(reason="failures_issue_1544") def test_class_labels(self): task = get_task(self.task_id) assert task.class_labels == ["tested_negative", "tested_positive"] diff --git a/tests/test_tasks/test_regression_task.py b/tests/test_tasks/test_regression_task.py index 5c3e79061..3e324c4f8 100644 --- a/tests/test_tasks/test_regression_task.py +++ b/tests/test_tasks/test_regression_task.py @@ -49,7 +49,6 @@ def setUp(self, n_levels: int = 1): self.task_type = TaskType.SUPERVISED_REGRESSION - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_X_and_Y(self): X, Y = super().test_get_X_and_Y() assert X.shape == (194, 32) diff --git a/tests/test_tasks/test_supervised_task.py b/tests/test_tasks/test_supervised_task.py index 48e036d3e..e5a17a72b 100644 --- a/tests/test_tasks/test_supervised_task.py +++ b/tests/test_tasks/test_supervised_task.py @@ -28,7 +28,6 @@ def setUpClass(cls): def setUp(self, n_levels: int = 1): super().setUp() - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_X_and_Y(self) -> tuple[pd.DataFrame, pd.Series]: task = get_task(self.task_id) X, Y = task.get_X_and_y() diff --git a/tests/test_tasks/test_task_functions.py b/tests/test_tasks/test_task_functions.py index 25dab05ea..0aa2dcc9b 100644 --- a/tests/test_tasks/test_task_functions.py +++ b/tests/test_tasks/test_task_functions.py @@ -139,7 +139,6 @@ def test__get_task_live(self): # https://github.com/openml/openml-python/issues/378 openml.tasks.get_task(34536) - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_task(self): task = openml.tasks.get_task(1, download_data=True) # anneal; crossvalidation assert isinstance(task, OpenMLTask) @@ -153,7 +152,6 @@ def test_get_task(self): os.path.join(self.workdir, "org", "openml", "test", "datasets", "1", "dataset.arff") ) - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_task_lazy(self): task = openml.tasks.get_task(2, download_data=False) # anneal; crossvalidation assert isinstance(task, OpenMLTask) @@ -194,7 +192,6 @@ def assert_and_raise(*args, **kwargs): # Now the file should no longer exist assert not os.path.exists(os.path.join(os.getcwd(), "tasks", "1", "tasks.xml")) - @pytest.mark.xfail(reason="failures_issue_1544") def test_get_task_with_cache(self): openml.config.set_root_cache_directory(self.static_cache_dir) task = openml.tasks.get_task(1) @@ -210,7 +207,6 @@ def test_get_task_different_types(self): # Issue 538, get_task failing with clustering task. openml.tasks.functions.get_task(126033) - @pytest.mark.xfail(reason="failures_issue_1544") def test_download_split(self): task = openml.tasks.get_task(1) # anneal; crossvalidation split = task.download_split() diff --git a/tests/test_tasks/test_task_methods.py b/tests/test_tasks/test_task_methods.py index 65c4ac37c..540c43de0 100644 --- a/tests/test_tasks/test_task_methods.py +++ b/tests/test_tasks/test_task_methods.py @@ -16,7 +16,6 @@ def setUp(self): def tearDown(self): super().tearDown() - @pytest.mark.xfail(reason="failures_issue_1544") def test_tagging(self): task = openml.tasks.get_task(1) # anneal; crossvalidation # tags can be at most 64 alphanumeric (+ underscore) chars