From 905e894e026d0ac37de42cbc31eae9f9b76a9e5f Mon Sep 17 00:00:00 2001 From: sujata-m Date: Mon, 15 Dec 2025 22:17:36 +0530 Subject: [PATCH] [0.3.2] Updated unit test cases - Reject non-OSW GeoJSON files (anything beyond points/nodes/edges/lines/polygons/zones) with a clear error; added unit coverage for unsupported files and extension validation paths. - Clarified extension handling expectations: external extension ZIPs are now considered invalid inputs in tests. - Added a detailed scenario summary (`unit_test_overview.md`) and renamed log-oriented tests for clarity; removed noisy traceback printing on unexpected validation errors. --- CHANGELOG.md | 7 +- .../extracted_data_validator.py | 10 ++ src/python_osw_validation/version.py | 2 +- .../test_extracted_data_validator.py | 10 ++ tests/unit_tests/test_osw_validation.py | 12 +- .../unit_tests/test_osw_validation_extras.py | 150 ++++++++++++++++-- unit_test_overview.md | 78 +++++++++ 7 files changed, 248 insertions(+), 21 deletions(-) create mode 100644 unit_test_overview.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 6787619..6325fdc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,11 @@ # Change log -### 0.3.1 - 2025-02-12 +### 0.3.2 - 2025-12-15 +- Reject non-OSW GeoJSON files (anything beyond points/nodes/edges/lines/polygons/zones) with a clear error; added unit coverage for unsupported files and extension validation paths. +- Clarified extension handling expectations: external extension ZIPs are now considered invalid inputs in tests. +- Added a detailed scenario summary (`unit_test_overview.md`) and renamed log-oriented tests for clarity; removed noisy traceback printing on unexpected validation errors. + +### 0.3.1 - 2025-12-15 - Restored custom field support across schemas (CustomNode/Edge/Zone) while aligning required fields with original behavior. - Relaxed edges/zones global required lists to let custom features omit `highway` (kept in branch-specific schemas). - Enhanced 0.2 compatibility guard: allow `ext:` on nodes, block custom content per dataset with specific messages (e.g., Custom Edge/Point/Polygon), and reject non-point `ext:`. diff --git a/src/python_osw_validation/extracted_data_validator.py b/src/python_osw_validation/extracted_data_validator.py index 901205f..52abdf1 100644 --- a/src/python_osw_validation/extracted_data_validator.py +++ b/src/python_osw_validation/extracted_data_validator.py @@ -98,6 +98,16 @@ def is_valid(self) -> bool: gc.collect() return True + allowed_keys = tuple(OSW_DATASET_FILES.keys()) + unsupported_files = sorted( + {bn for bn in basenames if not any(key in bn for key in allowed_keys)} + ) + if unsupported_files: + allowed_fmt = ", ".join(allowed_keys) + self.error = (f"Unsupported .geojson files present: {', '.join(unsupported_files)}. " + f"Allowed file types are {{{allowed_fmt}}}") + return False + required_files = [key for key, value in OSW_DATASET_FILES.items() if value['required']] optional_files = [key for key, value in OSW_DATASET_FILES.items() if not value['required']] missing_files = [] diff --git a/src/python_osw_validation/version.py b/src/python_osw_validation/version.py index e1424ed..73e3bb4 100644 --- a/src/python_osw_validation/version.py +++ b/src/python_osw_validation/version.py @@ -1 +1 @@ -__version__ = '0.3.1' +__version__ = '0.3.2' diff --git a/tests/unit_tests/test_extracted_data_validator.py b/tests/unit_tests/test_extracted_data_validator.py index 24badcc..f6ee874 100644 --- a/tests/unit_tests/test_extracted_data_validator.py +++ b/tests/unit_tests/test_extracted_data_validator.py @@ -99,6 +99,16 @@ def test_duplicate_required_files_detected(self): self.assertFalse(validator.is_valid()) self.assertEqual(validator.error, 'Multiple .geojson files of the same type found: nodes.') + def test_unsupported_files_are_rejected(self): + validator = ExtractedDataValidator(self.test_dir) + self.create_files(['a.nodes.geojson', 'something_else.geojson']) + self.assertFalse(validator.is_valid()) + self.assertEqual( + validator.error, + 'Unsupported .geojson files present: something_else.geojson. ' + 'Allowed file types are {edges, nodes, points, lines, zones, polygons}' + ) + if __name__ == '__main__': unittest.main() diff --git a/tests/unit_tests/test_osw_validation.py b/tests/unit_tests/test_osw_validation.py index 13d3dde..167f92f 100644 --- a/tests/unit_tests/test_osw_validation.py +++ b/tests/unit_tests/test_osw_validation.py @@ -166,22 +166,22 @@ def test_points_invalid_zipfile_with_invalid_schema(self): def test_external_extension_file_inside_zipfile(self): validation = OSWValidation(zipfile_path=self.external_extension_file_zipfile) result = validation.validate() - self.assertTrue(result.is_valid) - self.assertIsNone(result.errors) + self.assertFalse(result.is_valid) + self.assertIsNotNone(result.errors) def test_external_extension_file_inside_zipfile_with_schema(self): validation = OSWValidation(zipfile_path=self.external_extension_file_zipfile, schema_paths=self.schema_paths) result = validation.validate() - self.assertTrue(result.is_valid) - self.assertIsNone(result.errors) + self.assertFalse(result.is_valid) + self.assertIsNotNone(result.errors) def test_external_extension_file_inside_zipfile_with_invalid_schema(self): validation = OSWValidation(zipfile_path=self.external_extension_file_zipfile, schema_file_path=self.invalid_schema_file_path) result = validation.validate() - self.assertTrue(result.is_valid) - self.assertIsNone(result.errors) + self.assertFalse(result.is_valid) + self.assertIsNotNone(result.errors) def test_id_missing_zipfile(self): validation = OSWValidation(zipfile_path=self.id_missing_zipfile) diff --git a/tests/unit_tests/test_osw_validation_extras.py b/tests/unit_tests/test_osw_validation_extras.py index 0f4818a..3f7ef52 100644 --- a/tests/unit_tests/test_osw_validation_extras.py +++ b/tests/unit_tests/test_osw_validation_extras.py @@ -73,8 +73,8 @@ def _fake_validator(self, files, external_exts=None, valid=True, error="folder i # ---------------- tests ---------------- - def test_missing_u_id_logged_and_no_keyerror(self): - """Edges missing `_u_id` should log a friendly error instead of raising KeyError.""" + def test_missing_u_id_reports_error_without_keyerror(self): + """Edges missing `_u_id` should report a friendly error instead of raising KeyError.""" fake_files = ["/tmp/nodes.geojson", "/tmp/edges.geojson"] nodes = self._gdf_nodes([1, 2]) # edges WITHOUT _u_id; include _id to bypass duplicated('_id') KeyError @@ -175,7 +175,7 @@ def _rf(path): shown_ids = [x.strip() for x in displayed.split(",")] self.assertLessEqual(len(shown_ids), 20) - def test_load_osw_file_logs_json_decode_error(self): + def test_load_osw_file_reports_json_decode_error(self): """Invalid JSON should surface a detailed message with location context.""" validator = OSWValidation(zipfile_path="dummy.zip") with tempfile.NamedTemporaryFile("w", suffix=".geojson", delete=False) as tmp: @@ -200,7 +200,7 @@ def test_load_osw_file_logs_json_decode_error(self): self.assertIsNone(issue["feature_index"]) self.assertEqual(issue["error_message"], message) - def test_load_osw_file_logs_os_error(self): + def test_load_osw_file_reports_os_error(self): """Missing files should log a readable OS error message.""" validator = OSWValidation(zipfile_path="dummy.zip") missing_path = os.path.join(tempfile.gettempdir(), "nonexistent_osw_file.geojson") @@ -252,7 +252,7 @@ def test_validate_reports_json_decode_error(self): finally: os.unlink(bad_path) - def test_validate_logs_read_file_exception(self): + def test_validate_reports_read_file_exception(self): """GeoDataFrame read failures are logged and do not crash.""" fake_files = ["/tmp/edges.geojson"] @@ -274,8 +274,8 @@ def test_validate_logs_read_file_exception(self): self.assertTrue(any("Failed to read 'edges.geojson' as GeoJSON: boom" in e for e in (res.errors or [])), f"Errors were: {res.errors}") - def test_missing_w_id_logs_error(self): - """Zones missing _w_id should log a clear message.""" + def test_missing_w_id_reports_error(self): + """Zones missing _w_id should report a clear message.""" fake_files = ["/tmp/nodes.geojson", "/tmp/zones.geojson"] nodes = self._gdf_nodes([1, 2]) # zones without _w_id column @@ -310,8 +310,8 @@ def _rf(path): self.assertTrue(any("Missing required column '_w_id' in zones." in e for e in (res.errors or [])), f"Errors were: {res.errors}") - def test_extension_read_failure_is_logged(self): - """Failure reading an extension file should be logged and skipped.""" + def test_extension_read_failure_reports_error(self): + """Failure reading an extension file should be reported and skipped.""" fake_files = ["/tmp/nodes.geojson"] nodes = self._gdf_nodes([1]) ext_path = "/tmp/custom.geojson" @@ -346,8 +346,8 @@ def _rf(path): self.assertTrue(any("Failed to read extension 'custom.geojson' as GeoJSON: boom" in e for e in (res.errors or [])), f"Errors were: {res.errors}") - def test_extension_invalid_ids_logging_failure(self): - """If invalid extension features exist but id extraction fails, we log gracefully.""" + def test_extension_invalid_ids_reports_extraction_failure(self): + """If invalid extension features exist but id extraction fails, we report gracefully.""" ext_path = "/tmp/custom.geojson" fake_files = ["/tmp/nodes.geojson"] nodes = self._gdf_nodes([1]) @@ -392,6 +392,94 @@ def _rf(path): for e in (res.errors or [])), f"Errors were: {res.errors}") + def test_extension_invalid_geometries_reported(self): + """Invalid geometries inside an extension file should surface a clear error.""" + ext_path = "/tmp/custom.geojson" + fake_files = ["/tmp/nodes.geojson"] + nodes = self._gdf_nodes([1]) + + invalid_geojson = MagicMock() + invalid_geojson.__len__.return_value = 2 + invalid_geojson.get.return_value = ["a", "b"] + + extension_file = MagicMock() + extension_file.is_valid = [False, False] + extension_file.__getitem__.return_value = invalid_geojson + extension_file.drop.return_value = pd.DataFrame() + + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE, return_value=True), \ + patch(_PATCH_READ_FILE) as PRead, \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES): + + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + z.remove_extracted_files.return_value = None + PZip.return_value = z + + val = self._fake_validator(fake_files, external_exts=[ext_path]) + PVal.return_value = val + + def _rf(path): + b = os.path.basename(path) + if "nodes" in b: + return nodes + if os.path.basename(path) == os.path.basename(ext_path): + return extension_file + return gpd.GeoDataFrame() + + PRead.side_effect = _rf + + res = OSWValidation(zipfile_path="dummy.zip").validate() + + self.assertFalse(res.is_valid) + self.assertTrue(any("Invalid geometries found in extension file `custom.geojson`" in e + for e in (res.errors or [])), + f"Errors were: {res.errors}") + + def test_extension_serialization_failure_reported(self): + """Non-serializable extension properties should be reported.""" + ext_path = "/tmp/custom.geojson" + fake_files = ["/tmp/nodes.geojson"] + nodes = self._gdf_nodes([1]) + + extension_file = MagicMock() + extension_file.is_valid = [True] + extension_file.__getitem__.return_value = gpd.GeoDataFrame() # no invalid geometries + extension_file.drop.side_effect = Exception("serialize boom") + + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE, return_value=True), \ + patch(_PATCH_READ_FILE) as PRead, \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES): + + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + z.remove_extracted_files.return_value = None + PZip.return_value = z + + val = self._fake_validator(fake_files, external_exts=[ext_path]) + PVal.return_value = val + + def _rf(path): + b = os.path.basename(path) + if "nodes" in b: + return nodes + if os.path.basename(path) == os.path.basename(ext_path): + return extension_file + return gpd.GeoDataFrame() + + PRead.side_effect = _rf + + res = OSWValidation(zipfile_path="dummy.zip").validate() + + self.assertFalse(res.is_valid) + self.assertTrue(any("Extension file `custom.geojson` has non-serializable properties: serialize boom" in e + for e in (res.errors or [])), + f"Errors were: {res.errors}") + def test_duplicate_ids_detection(self): """Duplicates inside a single file are reported.""" fake_files = ["/tmp/nodes.geojson"] @@ -554,7 +642,7 @@ def test_get_colset_returns_set_when_column_present(self): s = v._get_colset(gdf, "_id", "nodes") self.assertEqual(s, {1, 2, 3}) - def test_get_colset_logs_and_returns_empty_when_missing(self): + def test_get_colset_reports_missing_column(self): v = OSWValidation(zipfile_path="dummy.zip") gdf = self._gdf({"foo": [1, 2]}, geom="Point") s = v._get_colset(gdf, "_id", "nodes") @@ -575,7 +663,7 @@ def test_get_colset_with_none_gdf(self): s = v._get_colset(None, "_id", "nodes") self.assertEqual(s, set()) - def test_get_colset_logs_when_stringify_fails(self): + def test_get_colset_reports_stringify_failure(self): class BadObj: def __hash__(self): raise TypeError("no hash") @@ -590,6 +678,16 @@ def __str__(self): self.assertTrue(any("Could not create set for column 'meta' in nodes." in e for e in (v.errors or [])), f"Errors were: {v.errors}") + def test_load_osw_schema_reports_missing_file(self): + v = OSWValidation(zipfile_path="dummy.zip") + missing_schema = os.path.join(tempfile.gettempdir(), "missing_schema.json") + if os.path.exists(missing_schema): + os.unlink(missing_schema) + with self.assertRaises(Exception): + v.load_osw_schema(missing_schema) + self.assertTrue(any("Invalid or missing schema file" in e for e in (v.errors or [])), + f"Errors were: {v.errors}") + def test_schema_02_rejects_tree_and_custom(self): base = { "$schema": "https://sidewalks.washington.edu/opensidewalks/0.2/schema.json", @@ -758,6 +856,32 @@ def test_pick_schema_force_single_schema_override(self): self.assertEqual(v.pick_schema_for_file("/tmp/my.edges.geojson", {"features": []}), force) self.assertEqual(v.pick_schema_for_file("/any/path.json", {"features": [{"geometry": {"type": "Point"}}]}), force) + def test_unexpected_exception_surfaces_unable_to_validate(self): + """Any unexpected exception should be surfaced via 'Unable to validate'.""" + fake_files = ["/tmp/nodes.geojson"] + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE, side_effect=RuntimeError("boom")), \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES): + + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + z.remove_extracted_files.return_value = None + PZip.return_value = z + + val = MagicMock() + val.files = fake_files + val.externalExtensions = [] + val.is_valid.return_value = True + val.error = None + PVal.return_value = val + + res = OSWValidation(zipfile_path="dummy.zip").validate() + + self.assertFalse(res.is_valid) + self.assertTrue(any("Unable to validate: boom" in e for e in (res.errors or [])), + f"Errors were: {res.errors}") + class TestOSWValidationInvalidGeometryLogging(unittest.TestCase): """Covers the 'invalid geometries' logging branch, including _id-present and _id-missing fallback.""" diff --git a/unit_test_overview.md b/unit_test_overview.md new file mode 100644 index 0000000..36db5bb --- /dev/null +++ b/unit_test_overview.md @@ -0,0 +1,78 @@ +# Unit Test Scenario Counts + +Below is a breakdown of all test scenarios (functions named `test_`) across the suite, plus a short description of the main areas they cover. + +- **Total scenarios:** 116 + +| Test module | Scenarios | Focus | +| --- | ---: | --- | +| tests/test_schema_parity.py | 14 | Schema parity and customized-field handling | +| tests/unit_tests/test_extracted_data_validator.py | 11 | Folder/filename validation, duplicates/missing files, unsupported files | +| tests/unit_tests/test_helpers.py | 14 | Helper utilities (pretty messages, rankings, feature index extraction) | +| tests/unit_tests/test_osw_validation.py | 34 | End-to-end ZIP validation across assets (valid/invalid, schemas, serialization, unmatched IDs) | +| tests/unit_tests/test_osw_validation_extras.py | 34 | Edge cases: foreign-key checks, duplicate IDs, invalid geometries, extension file failures, JSON/OS errors, 0.2 disallowed content, unexpected exceptions | +| tests/unit_tests/test_schema_definitions.py | 1 | Schema definition smoke check | +| tests/unit_tests/test_schema_metadata.py | 3 | Metadata/heuristic schema selection | +| tests/unit_tests/test_zipfile_handler.py | 5 | Zip extraction/creation lifecycle | + +Method used: counted functions matching `def test_` within `tests/`. + +## Detail for heavy-hitter suites + +### tests/unit_tests/test_osw_validation.py (34 scenarios) +- Zip validation success/failure paths: `test_valid_zipfile`, `test_valid_zipfile_with_schema`, `test_valid_zipfile_with_invalid_schema`, `test_minimal_zipfile*`, `test_invalid_zipfile*` (with/without schemas, error cap checks). +- Dataset-specific invalid archives: `test_nodes_invalid_zipfile*`, `test_edges_invalid_zipfile*`, `test_points_invalid_zipfile*`. +- External extensions: `test_external_extension_file_inside_zipfile*` (default/override/invalid schema). +- Schema-level failures: missing `_id` (`test_id_missing_zipfile`), extra fields, invalid geometry, missing identifiers, no entities, wrong datatypes. +- Zones coverage: `test_valid_zones_file`, `test_invalid_zones_file`. +- Serialization error surfaced: `test_invalid_serialization_file`. +- Foreign-key cap: `test_unmatched_ids_limited_to_20`. + +### tests/unit_tests/test_osw_validation_extras.py (34 scenarios) +- Foreign-key checks: `test_missing_u_id_reports_error_without_keyerror`, `test_unmatched_u_id_is_limited_to_20`, `test_unmatched_w_id_is_limited_to_20`. +- JSON/IO parsing: `test_load_osw_file_reports_json_decode_error`, `test_load_osw_file_reports_os_error`, `test_validate_reports_json_decode_error`. +- GeoDataFrame read failures: `test_validate_reports_read_file_exception`. +- Zones column presence: `test_missing_w_id_reports_error`. +- Extension handling: read failure, invalid IDs, invalid geometries, serialization failure (`test_extension_*` group). +- Duplicate IDs: `test_duplicate_ids_detection`, `test_duplicate_ids_detection_is_limited_to_20`. +- Invalid geometries: `_id` present vs missing with cap (`test_invalid_geometry_logs_*`). +- Helpers and selection: `test_pick_schema_*`, `_get_colset*`, `test_load_osw_schema_reports_missing_file`. +- 0.2 disallowed content vs 0.3 allowed: `test_schema_02_rejects_tree_and_custom`, `test_schema_03_with_tree_tags_is_allowed`. +- Robustness: cleanup handling, zip extract failure, invalid folder structure, unexpected exception path (`test_cleanup_handles_locals_membership_error`, `test_zip_extract_failure_bubbles_as_error`, `test_extracted_data_validator_invalid`, `test_unexpected_exception_surfaces_unable_to_validate`, `test_issues_populated_for_invalid_zip`). + +### tests/unit_tests/test_extracted_data_validator.py (11 scenarios) +- Valid layouts at root or nested, empty/invalid directories, and no-geojson detection. +- Duplicate detection and missing required files (patched OSW_DATASET_FILES). +- Non-standard filenames rejected; valid subsets of canonical OSW files accepted. +- Unsupported GeoJSON types (non OSW keys) are rejected. + +### tests/test_schema_parity.py (14 scenarios) +- Custom feature acceptance/rejection across nodes/edges/points/lines/polygons/zones. +- 0.2 vs 0.3 parity: custom tags rejected on 0.2 where appropriate; accepted on 0.3. +- Extension fields allowed/blocked per schema version. + +### tests/unit_tests/test_helpers.py (14 scenarios) +- Feature index extraction robustness across error shapes. +- Pretty-message compaction (enum, anyOf, ordering, trimming noise). +- Error ranking/tie-breaking and message selection helpers. + +### tests/unit_tests/test_schema_definitions.py (1 scenario) +- Ensures schema definitions include required fields (smoke check). + +### tests/unit_tests/test_schema_metadata.py (3 scenarios) +- Enforces schema requirement by geometry type (Point/LineString/Polygon) when schema URL missing. + +### tests/unit_tests/test_zipfile_handler.py (5 scenarios) +- Zip extraction success/failure, cleanup of extracted files. +- Zip creation happy path and failure path. + +## Coverage snapshot + +Name | Stmts | Miss | Cover +--- | ---: | ---: | ---: +src/python_osw_validation/__init__.py | 300 | 8 | 97% +src/python_osw_validation/extracted_data_validator.py | 90 | 1 | 99% +src/python_osw_validation/helpers.py | 54 | 2 | 96% +src/python_osw_validation/version.py | 1 | 0 | 100% +src/python_osw_validation/zipfile_handler.py | 48 | 1 | 98% +**TOTAL** | 493 | 12 | 98%