Skip to content

Commit 38c0ddd

Browse files
authored
Merge pull request #51 from TaskarCenterAtUW/develop
[0.3.2] Updated unit test cases
2 parents c84ea45 + 94acdba commit 38c0ddd

File tree

7 files changed

+248
-21
lines changed

7 files changed

+248
-21
lines changed

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
# Change log
22

3-
### 0.3.1 - 2025-02-12
3+
### 0.3.2 - 2025-12-15
4+
- Reject non-OSW GeoJSON files (anything beyond points/nodes/edges/lines/polygons/zones) with a clear error; added unit coverage for unsupported files and extension validation paths.
5+
- Clarified extension handling expectations: external extension ZIPs are now considered invalid inputs in tests.
6+
- Added a detailed scenario summary (`unit_test_overview.md`) and renamed log-oriented tests for clarity; removed noisy traceback printing on unexpected validation errors.
7+
8+
### 0.3.1 - 2025-12-15
49
- Restored custom field support across schemas (CustomNode/Edge/Zone) while aligning required fields with original behavior.
510
- Relaxed edges/zones global required lists to let custom features omit `highway` (kept in branch-specific schemas).
611
- Enhanced 0.2 compatibility guard: allow `ext:` on nodes, block custom content per dataset with specific messages (e.g., Custom Edge/Point/Polygon), and reject non-point `ext:`.

src/python_osw_validation/extracted_data_validator.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,16 @@ def is_valid(self) -> bool:
9898
gc.collect()
9999
return True
100100

101+
allowed_keys = tuple(OSW_DATASET_FILES.keys())
102+
unsupported_files = sorted(
103+
{bn for bn in basenames if not any(key in bn for key in allowed_keys)}
104+
)
105+
if unsupported_files:
106+
allowed_fmt = ", ".join(allowed_keys)
107+
self.error = (f"Unsupported .geojson files present: {', '.join(unsupported_files)}. "
108+
f"Allowed file types are {{{allowed_fmt}}}")
109+
return False
110+
101111
required_files = [key for key, value in OSW_DATASET_FILES.items() if value['required']]
102112
optional_files = [key for key, value in OSW_DATASET_FILES.items() if not value['required']]
103113
missing_files = []
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.3.1'
1+
__version__ = '0.3.2'

tests/unit_tests/test_extracted_data_validator.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,16 @@ def test_duplicate_required_files_detected(self):
9999
self.assertFalse(validator.is_valid())
100100
self.assertEqual(validator.error, 'Multiple .geojson files of the same type found: nodes.')
101101

102+
def test_unsupported_files_are_rejected(self):
103+
validator = ExtractedDataValidator(self.test_dir)
104+
self.create_files(['a.nodes.geojson', 'something_else.geojson'])
105+
self.assertFalse(validator.is_valid())
106+
self.assertEqual(
107+
validator.error,
108+
'Unsupported .geojson files present: something_else.geojson. '
109+
'Allowed file types are {edges, nodes, points, lines, zones, polygons}'
110+
)
111+
102112

103113
if __name__ == '__main__':
104114
unittest.main()

tests/unit_tests/test_osw_validation.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,22 +166,22 @@ def test_points_invalid_zipfile_with_invalid_schema(self):
166166
def test_external_extension_file_inside_zipfile(self):
167167
validation = OSWValidation(zipfile_path=self.external_extension_file_zipfile)
168168
result = validation.validate()
169-
self.assertTrue(result.is_valid)
170-
self.assertIsNone(result.errors)
169+
self.assertFalse(result.is_valid)
170+
self.assertIsNotNone(result.errors)
171171

172172
def test_external_extension_file_inside_zipfile_with_schema(self):
173173
validation = OSWValidation(zipfile_path=self.external_extension_file_zipfile,
174174
schema_paths=self.schema_paths)
175175
result = validation.validate()
176-
self.assertTrue(result.is_valid)
177-
self.assertIsNone(result.errors)
176+
self.assertFalse(result.is_valid)
177+
self.assertIsNotNone(result.errors)
178178

179179
def test_external_extension_file_inside_zipfile_with_invalid_schema(self):
180180
validation = OSWValidation(zipfile_path=self.external_extension_file_zipfile,
181181
schema_file_path=self.invalid_schema_file_path)
182182
result = validation.validate()
183-
self.assertTrue(result.is_valid)
184-
self.assertIsNone(result.errors)
183+
self.assertFalse(result.is_valid)
184+
self.assertIsNotNone(result.errors)
185185

186186
def test_id_missing_zipfile(self):
187187
validation = OSWValidation(zipfile_path=self.id_missing_zipfile)

tests/unit_tests/test_osw_validation_extras.py

Lines changed: 137 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ def _fake_validator(self, files, external_exts=None, valid=True, error="folder i
7373

7474
# ---------------- tests ----------------
7575

76-
def test_missing_u_id_logged_and_no_keyerror(self):
77-
"""Edges missing `_u_id` should log a friendly error instead of raising KeyError."""
76+
def test_missing_u_id_reports_error_without_keyerror(self):
77+
"""Edges missing `_u_id` should report a friendly error instead of raising KeyError."""
7878
fake_files = ["/tmp/nodes.geojson", "/tmp/edges.geojson"]
7979
nodes = self._gdf_nodes([1, 2])
8080
# edges WITHOUT _u_id; include _id to bypass duplicated('_id') KeyError
@@ -175,7 +175,7 @@ def _rf(path):
175175
shown_ids = [x.strip() for x in displayed.split(",")]
176176
self.assertLessEqual(len(shown_ids), 20)
177177

178-
def test_load_osw_file_logs_json_decode_error(self):
178+
def test_load_osw_file_reports_json_decode_error(self):
179179
"""Invalid JSON should surface a detailed message with location context."""
180180
validator = OSWValidation(zipfile_path="dummy.zip")
181181
with tempfile.NamedTemporaryFile("w", suffix=".geojson", delete=False) as tmp:
@@ -200,7 +200,7 @@ def test_load_osw_file_logs_json_decode_error(self):
200200
self.assertIsNone(issue["feature_index"])
201201
self.assertEqual(issue["error_message"], message)
202202

203-
def test_load_osw_file_logs_os_error(self):
203+
def test_load_osw_file_reports_os_error(self):
204204
"""Missing files should log a readable OS error message."""
205205
validator = OSWValidation(zipfile_path="dummy.zip")
206206
missing_path = os.path.join(tempfile.gettempdir(), "nonexistent_osw_file.geojson")
@@ -252,7 +252,7 @@ def test_validate_reports_json_decode_error(self):
252252
finally:
253253
os.unlink(bad_path)
254254

255-
def test_validate_logs_read_file_exception(self):
255+
def test_validate_reports_read_file_exception(self):
256256
"""GeoDataFrame read failures are logged and do not crash."""
257257
fake_files = ["/tmp/edges.geojson"]
258258

@@ -274,8 +274,8 @@ def test_validate_logs_read_file_exception(self):
274274
self.assertTrue(any("Failed to read 'edges.geojson' as GeoJSON: boom" in e for e in (res.errors or [])),
275275
f"Errors were: {res.errors}")
276276

277-
def test_missing_w_id_logs_error(self):
278-
"""Zones missing _w_id should log a clear message."""
277+
def test_missing_w_id_reports_error(self):
278+
"""Zones missing _w_id should report a clear message."""
279279
fake_files = ["/tmp/nodes.geojson", "/tmp/zones.geojson"]
280280
nodes = self._gdf_nodes([1, 2])
281281
# zones without _w_id column
@@ -310,8 +310,8 @@ def _rf(path):
310310
self.assertTrue(any("Missing required column '_w_id' in zones." in e for e in (res.errors or [])),
311311
f"Errors were: {res.errors}")
312312

313-
def test_extension_read_failure_is_logged(self):
314-
"""Failure reading an extension file should be logged and skipped."""
313+
def test_extension_read_failure_reports_error(self):
314+
"""Failure reading an extension file should be reported and skipped."""
315315
fake_files = ["/tmp/nodes.geojson"]
316316
nodes = self._gdf_nodes([1])
317317
ext_path = "/tmp/custom.geojson"
@@ -346,8 +346,8 @@ def _rf(path):
346346
self.assertTrue(any("Failed to read extension 'custom.geojson' as GeoJSON: boom" in e for e in (res.errors or [])),
347347
f"Errors were: {res.errors}")
348348

349-
def test_extension_invalid_ids_logging_failure(self):
350-
"""If invalid extension features exist but id extraction fails, we log gracefully."""
349+
def test_extension_invalid_ids_reports_extraction_failure(self):
350+
"""If invalid extension features exist but id extraction fails, we report gracefully."""
351351
ext_path = "/tmp/custom.geojson"
352352
fake_files = ["/tmp/nodes.geojson"]
353353
nodes = self._gdf_nodes([1])
@@ -392,6 +392,94 @@ def _rf(path):
392392
for e in (res.errors or [])),
393393
f"Errors were: {res.errors}")
394394

395+
def test_extension_invalid_geometries_reported(self):
396+
"""Invalid geometries inside an extension file should surface a clear error."""
397+
ext_path = "/tmp/custom.geojson"
398+
fake_files = ["/tmp/nodes.geojson"]
399+
nodes = self._gdf_nodes([1])
400+
401+
invalid_geojson = MagicMock()
402+
invalid_geojson.__len__.return_value = 2
403+
invalid_geojson.get.return_value = ["a", "b"]
404+
405+
extension_file = MagicMock()
406+
extension_file.is_valid = [False, False]
407+
extension_file.__getitem__.return_value = invalid_geojson
408+
extension_file.drop.return_value = pd.DataFrame()
409+
410+
with patch(_PATCH_ZIP) as PZip, \
411+
patch(_PATCH_EV) as PVal, \
412+
patch(_PATCH_VALIDATE, return_value=True), \
413+
patch(_PATCH_READ_FILE) as PRead, \
414+
patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES):
415+
416+
z = MagicMock()
417+
z.extract_zip.return_value = "/tmp/extracted"
418+
z.remove_extracted_files.return_value = None
419+
PZip.return_value = z
420+
421+
val = self._fake_validator(fake_files, external_exts=[ext_path])
422+
PVal.return_value = val
423+
424+
def _rf(path):
425+
b = os.path.basename(path)
426+
if "nodes" in b:
427+
return nodes
428+
if os.path.basename(path) == os.path.basename(ext_path):
429+
return extension_file
430+
return gpd.GeoDataFrame()
431+
432+
PRead.side_effect = _rf
433+
434+
res = OSWValidation(zipfile_path="dummy.zip").validate()
435+
436+
self.assertFalse(res.is_valid)
437+
self.assertTrue(any("Invalid geometries found in extension file `custom.geojson`" in e
438+
for e in (res.errors or [])),
439+
f"Errors were: {res.errors}")
440+
441+
def test_extension_serialization_failure_reported(self):
442+
"""Non-serializable extension properties should be reported."""
443+
ext_path = "/tmp/custom.geojson"
444+
fake_files = ["/tmp/nodes.geojson"]
445+
nodes = self._gdf_nodes([1])
446+
447+
extension_file = MagicMock()
448+
extension_file.is_valid = [True]
449+
extension_file.__getitem__.return_value = gpd.GeoDataFrame() # no invalid geometries
450+
extension_file.drop.side_effect = Exception("serialize boom")
451+
452+
with patch(_PATCH_ZIP) as PZip, \
453+
patch(_PATCH_EV) as PVal, \
454+
patch(_PATCH_VALIDATE, return_value=True), \
455+
patch(_PATCH_READ_FILE) as PRead, \
456+
patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES):
457+
458+
z = MagicMock()
459+
z.extract_zip.return_value = "/tmp/extracted"
460+
z.remove_extracted_files.return_value = None
461+
PZip.return_value = z
462+
463+
val = self._fake_validator(fake_files, external_exts=[ext_path])
464+
PVal.return_value = val
465+
466+
def _rf(path):
467+
b = os.path.basename(path)
468+
if "nodes" in b:
469+
return nodes
470+
if os.path.basename(path) == os.path.basename(ext_path):
471+
return extension_file
472+
return gpd.GeoDataFrame()
473+
474+
PRead.side_effect = _rf
475+
476+
res = OSWValidation(zipfile_path="dummy.zip").validate()
477+
478+
self.assertFalse(res.is_valid)
479+
self.assertTrue(any("Extension file `custom.geojson` has non-serializable properties: serialize boom" in e
480+
for e in (res.errors or [])),
481+
f"Errors were: {res.errors}")
482+
395483
def test_duplicate_ids_detection(self):
396484
"""Duplicates inside a single file are reported."""
397485
fake_files = ["/tmp/nodes.geojson"]
@@ -554,7 +642,7 @@ def test_get_colset_returns_set_when_column_present(self):
554642
s = v._get_colset(gdf, "_id", "nodes")
555643
self.assertEqual(s, {1, 2, 3})
556644

557-
def test_get_colset_logs_and_returns_empty_when_missing(self):
645+
def test_get_colset_reports_missing_column(self):
558646
v = OSWValidation(zipfile_path="dummy.zip")
559647
gdf = self._gdf({"foo": [1, 2]}, geom="Point")
560648
s = v._get_colset(gdf, "_id", "nodes")
@@ -575,7 +663,7 @@ def test_get_colset_with_none_gdf(self):
575663
s = v._get_colset(None, "_id", "nodes")
576664
self.assertEqual(s, set())
577665

578-
def test_get_colset_logs_when_stringify_fails(self):
666+
def test_get_colset_reports_stringify_failure(self):
579667
class BadObj:
580668
def __hash__(self):
581669
raise TypeError("no hash")
@@ -590,6 +678,16 @@ def __str__(self):
590678
self.assertTrue(any("Could not create set for column 'meta' in nodes." in e for e in (v.errors or [])),
591679
f"Errors were: {v.errors}")
592680

681+
def test_load_osw_schema_reports_missing_file(self):
682+
v = OSWValidation(zipfile_path="dummy.zip")
683+
missing_schema = os.path.join(tempfile.gettempdir(), "missing_schema.json")
684+
if os.path.exists(missing_schema):
685+
os.unlink(missing_schema)
686+
with self.assertRaises(Exception):
687+
v.load_osw_schema(missing_schema)
688+
self.assertTrue(any("Invalid or missing schema file" in e for e in (v.errors or [])),
689+
f"Errors were: {v.errors}")
690+
593691
def test_schema_02_rejects_tree_and_custom(self):
594692
base = {
595693
"$schema": "https://sidewalks.washington.edu/opensidewalks/0.2/schema.json",
@@ -758,6 +856,32 @@ def test_pick_schema_force_single_schema_override(self):
758856
self.assertEqual(v.pick_schema_for_file("/tmp/my.edges.geojson", {"features": []}), force)
759857
self.assertEqual(v.pick_schema_for_file("/any/path.json", {"features": [{"geometry": {"type": "Point"}}]}), force)
760858

859+
def test_unexpected_exception_surfaces_unable_to_validate(self):
860+
"""Any unexpected exception should be surfaced via 'Unable to validate'."""
861+
fake_files = ["/tmp/nodes.geojson"]
862+
with patch(_PATCH_ZIP) as PZip, \
863+
patch(_PATCH_EV) as PVal, \
864+
patch(_PATCH_VALIDATE, side_effect=RuntimeError("boom")), \
865+
patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES):
866+
867+
z = MagicMock()
868+
z.extract_zip.return_value = "/tmp/extracted"
869+
z.remove_extracted_files.return_value = None
870+
PZip.return_value = z
871+
872+
val = MagicMock()
873+
val.files = fake_files
874+
val.externalExtensions = []
875+
val.is_valid.return_value = True
876+
val.error = None
877+
PVal.return_value = val
878+
879+
res = OSWValidation(zipfile_path="dummy.zip").validate()
880+
881+
self.assertFalse(res.is_valid)
882+
self.assertTrue(any("Unable to validate: boom" in e for e in (res.errors or [])),
883+
f"Errors were: {res.errors}")
884+
761885

762886
class TestOSWValidationInvalidGeometryLogging(unittest.TestCase):
763887
"""Covers the 'invalid geometries' logging branch, including _id-present and _id-missing fallback."""

0 commit comments

Comments
 (0)