Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Change log

### 0.3.1 - 2025-02-12
### 0.3.2 - 2025-12-15
- Reject non-OSW GeoJSON files (anything beyond points/nodes/edges/lines/polygons/zones) with a clear error; added unit coverage for unsupported files and extension validation paths.
- Clarified extension handling expectations: external extension ZIPs are now considered invalid inputs in tests.
- Added a detailed scenario summary (`unit_test_overview.md`) and renamed log-oriented tests for clarity; removed noisy traceback printing on unexpected validation errors.

### 0.3.1 - 2025-12-15
- Restored custom field support across schemas (CustomNode/Edge/Zone) while aligning required fields with original behavior.
- Relaxed edges/zones global required lists to let custom features omit `highway` (kept in branch-specific schemas).
- Enhanced 0.2 compatibility guard: allow `ext:` on nodes, block custom content per dataset with specific messages (e.g., Custom Edge/Point/Polygon), and reject non-point `ext:`.
Expand Down
10 changes: 10 additions & 0 deletions src/python_osw_validation/extracted_data_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ def is_valid(self) -> bool:
gc.collect()
return True

allowed_keys = tuple(OSW_DATASET_FILES.keys())
unsupported_files = sorted(
{bn for bn in basenames if not any(key in bn for key in allowed_keys)}
)
if unsupported_files:
allowed_fmt = ", ".join(allowed_keys)
self.error = (f"Unsupported .geojson files present: {', '.join(unsupported_files)}. "
f"Allowed file types are {{{allowed_fmt}}}")
return False

required_files = [key for key, value in OSW_DATASET_FILES.items() if value['required']]
optional_files = [key for key, value in OSW_DATASET_FILES.items() if not value['required']]
missing_files = []
Expand Down
2 changes: 1 addition & 1 deletion src/python_osw_validation/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.3.1'
__version__ = '0.3.2'
10 changes: 10 additions & 0 deletions tests/unit_tests/test_extracted_data_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ def test_duplicate_required_files_detected(self):
self.assertFalse(validator.is_valid())
self.assertEqual(validator.error, 'Multiple .geojson files of the same type found: nodes.')

def test_unsupported_files_are_rejected(self):
validator = ExtractedDataValidator(self.test_dir)
self.create_files(['a.nodes.geojson', 'something_else.geojson'])
self.assertFalse(validator.is_valid())
self.assertEqual(
validator.error,
'Unsupported .geojson files present: something_else.geojson. '
'Allowed file types are {edges, nodes, points, lines, zones, polygons}'
)


if __name__ == '__main__':
unittest.main()
12 changes: 6 additions & 6 deletions tests/unit_tests/test_osw_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,22 +166,22 @@ def test_points_invalid_zipfile_with_invalid_schema(self):
def test_external_extension_file_inside_zipfile(self):
validation = OSWValidation(zipfile_path=self.external_extension_file_zipfile)
result = validation.validate()
self.assertTrue(result.is_valid)
self.assertIsNone(result.errors)
self.assertFalse(result.is_valid)
self.assertIsNotNone(result.errors)

def test_external_extension_file_inside_zipfile_with_schema(self):
validation = OSWValidation(zipfile_path=self.external_extension_file_zipfile,
schema_paths=self.schema_paths)
result = validation.validate()
self.assertTrue(result.is_valid)
self.assertIsNone(result.errors)
self.assertFalse(result.is_valid)
self.assertIsNotNone(result.errors)

def test_external_extension_file_inside_zipfile_with_invalid_schema(self):
validation = OSWValidation(zipfile_path=self.external_extension_file_zipfile,
schema_file_path=self.invalid_schema_file_path)
result = validation.validate()
self.assertTrue(result.is_valid)
self.assertIsNone(result.errors)
self.assertFalse(result.is_valid)
self.assertIsNotNone(result.errors)

def test_id_missing_zipfile(self):
validation = OSWValidation(zipfile_path=self.id_missing_zipfile)
Expand Down
150 changes: 137 additions & 13 deletions tests/unit_tests/test_osw_validation_extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ def _fake_validator(self, files, external_exts=None, valid=True, error="folder i

# ---------------- tests ----------------

def test_missing_u_id_logged_and_no_keyerror(self):
"""Edges missing `_u_id` should log a friendly error instead of raising KeyError."""
def test_missing_u_id_reports_error_without_keyerror(self):
"""Edges missing `_u_id` should report a friendly error instead of raising KeyError."""
fake_files = ["/tmp/nodes.geojson", "/tmp/edges.geojson"]
nodes = self._gdf_nodes([1, 2])
# edges WITHOUT _u_id; include _id to bypass duplicated('_id') KeyError
Expand Down Expand Up @@ -175,7 +175,7 @@ def _rf(path):
shown_ids = [x.strip() for x in displayed.split(",")]
self.assertLessEqual(len(shown_ids), 20)

def test_load_osw_file_logs_json_decode_error(self):
def test_load_osw_file_reports_json_decode_error(self):
"""Invalid JSON should surface a detailed message with location context."""
validator = OSWValidation(zipfile_path="dummy.zip")
with tempfile.NamedTemporaryFile("w", suffix=".geojson", delete=False) as tmp:
Expand All @@ -200,7 +200,7 @@ def test_load_osw_file_logs_json_decode_error(self):
self.assertIsNone(issue["feature_index"])
self.assertEqual(issue["error_message"], message)

def test_load_osw_file_logs_os_error(self):
def test_load_osw_file_reports_os_error(self):
"""Missing files should log a readable OS error message."""
validator = OSWValidation(zipfile_path="dummy.zip")
missing_path = os.path.join(tempfile.gettempdir(), "nonexistent_osw_file.geojson")
Expand Down Expand Up @@ -252,7 +252,7 @@ def test_validate_reports_json_decode_error(self):
finally:
os.unlink(bad_path)

def test_validate_logs_read_file_exception(self):
def test_validate_reports_read_file_exception(self):
"""GeoDataFrame read failures are logged and do not crash."""
fake_files = ["/tmp/edges.geojson"]

Expand All @@ -274,8 +274,8 @@ def test_validate_logs_read_file_exception(self):
self.assertTrue(any("Failed to read 'edges.geojson' as GeoJSON: boom" in e for e in (res.errors or [])),
f"Errors were: {res.errors}")

def test_missing_w_id_logs_error(self):
"""Zones missing _w_id should log a clear message."""
def test_missing_w_id_reports_error(self):
"""Zones missing _w_id should report a clear message."""
fake_files = ["/tmp/nodes.geojson", "/tmp/zones.geojson"]
nodes = self._gdf_nodes([1, 2])
# zones without _w_id column
Expand Down Expand Up @@ -310,8 +310,8 @@ def _rf(path):
self.assertTrue(any("Missing required column '_w_id' in zones." in e for e in (res.errors or [])),
f"Errors were: {res.errors}")

def test_extension_read_failure_is_logged(self):
"""Failure reading an extension file should be logged and skipped."""
def test_extension_read_failure_reports_error(self):
"""Failure reading an extension file should be reported and skipped."""
fake_files = ["/tmp/nodes.geojson"]
nodes = self._gdf_nodes([1])
ext_path = "/tmp/custom.geojson"
Expand Down Expand Up @@ -346,8 +346,8 @@ def _rf(path):
self.assertTrue(any("Failed to read extension 'custom.geojson' as GeoJSON: boom" in e for e in (res.errors or [])),
f"Errors were: {res.errors}")

def test_extension_invalid_ids_logging_failure(self):
"""If invalid extension features exist but id extraction fails, we log gracefully."""
def test_extension_invalid_ids_reports_extraction_failure(self):
"""If invalid extension features exist but id extraction fails, we report gracefully."""
ext_path = "/tmp/custom.geojson"
fake_files = ["/tmp/nodes.geojson"]
nodes = self._gdf_nodes([1])
Expand Down Expand Up @@ -392,6 +392,94 @@ def _rf(path):
for e in (res.errors or [])),
f"Errors were: {res.errors}")

def test_extension_invalid_geometries_reported(self):
"""Invalid geometries inside an extension file should surface a clear error."""
ext_path = "/tmp/custom.geojson"
fake_files = ["/tmp/nodes.geojson"]
nodes = self._gdf_nodes([1])

invalid_geojson = MagicMock()
invalid_geojson.__len__.return_value = 2
invalid_geojson.get.return_value = ["a", "b"]

extension_file = MagicMock()
extension_file.is_valid = [False, False]
extension_file.__getitem__.return_value = invalid_geojson
extension_file.drop.return_value = pd.DataFrame()

with patch(_PATCH_ZIP) as PZip, \
patch(_PATCH_EV) as PVal, \
patch(_PATCH_VALIDATE, return_value=True), \
patch(_PATCH_READ_FILE) as PRead, \
patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES):

z = MagicMock()
z.extract_zip.return_value = "/tmp/extracted"
z.remove_extracted_files.return_value = None
PZip.return_value = z

val = self._fake_validator(fake_files, external_exts=[ext_path])
PVal.return_value = val

def _rf(path):
b = os.path.basename(path)
if "nodes" in b:
return nodes
if os.path.basename(path) == os.path.basename(ext_path):
return extension_file
return gpd.GeoDataFrame()

PRead.side_effect = _rf

res = OSWValidation(zipfile_path="dummy.zip").validate()

self.assertFalse(res.is_valid)
self.assertTrue(any("Invalid geometries found in extension file `custom.geojson`" in e
for e in (res.errors or [])),
f"Errors were: {res.errors}")

def test_extension_serialization_failure_reported(self):
"""Non-serializable extension properties should be reported."""
ext_path = "/tmp/custom.geojson"
fake_files = ["/tmp/nodes.geojson"]
nodes = self._gdf_nodes([1])

extension_file = MagicMock()
extension_file.is_valid = [True]
extension_file.__getitem__.return_value = gpd.GeoDataFrame() # no invalid geometries
extension_file.drop.side_effect = Exception("serialize boom")

with patch(_PATCH_ZIP) as PZip, \
patch(_PATCH_EV) as PVal, \
patch(_PATCH_VALIDATE, return_value=True), \
patch(_PATCH_READ_FILE) as PRead, \
patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES):

z = MagicMock()
z.extract_zip.return_value = "/tmp/extracted"
z.remove_extracted_files.return_value = None
PZip.return_value = z

val = self._fake_validator(fake_files, external_exts=[ext_path])
PVal.return_value = val

def _rf(path):
b = os.path.basename(path)
if "nodes" in b:
return nodes
if os.path.basename(path) == os.path.basename(ext_path):
return extension_file
return gpd.GeoDataFrame()

PRead.side_effect = _rf

res = OSWValidation(zipfile_path="dummy.zip").validate()

self.assertFalse(res.is_valid)
self.assertTrue(any("Extension file `custom.geojson` has non-serializable properties: serialize boom" in e
for e in (res.errors or [])),
f"Errors were: {res.errors}")

def test_duplicate_ids_detection(self):
"""Duplicates inside a single file are reported."""
fake_files = ["/tmp/nodes.geojson"]
Expand Down Expand Up @@ -554,7 +642,7 @@ def test_get_colset_returns_set_when_column_present(self):
s = v._get_colset(gdf, "_id", "nodes")
self.assertEqual(s, {1, 2, 3})

def test_get_colset_logs_and_returns_empty_when_missing(self):
def test_get_colset_reports_missing_column(self):
v = OSWValidation(zipfile_path="dummy.zip")
gdf = self._gdf({"foo": [1, 2]}, geom="Point")
s = v._get_colset(gdf, "_id", "nodes")
Expand All @@ -575,7 +663,7 @@ def test_get_colset_with_none_gdf(self):
s = v._get_colset(None, "_id", "nodes")
self.assertEqual(s, set())

def test_get_colset_logs_when_stringify_fails(self):
def test_get_colset_reports_stringify_failure(self):
class BadObj:
def __hash__(self):
raise TypeError("no hash")
Expand All @@ -590,6 +678,16 @@ def __str__(self):
self.assertTrue(any("Could not create set for column 'meta' in nodes." in e for e in (v.errors or [])),
f"Errors were: {v.errors}")

def test_load_osw_schema_reports_missing_file(self):
v = OSWValidation(zipfile_path="dummy.zip")
missing_schema = os.path.join(tempfile.gettempdir(), "missing_schema.json")
if os.path.exists(missing_schema):
os.unlink(missing_schema)
with self.assertRaises(Exception):
v.load_osw_schema(missing_schema)
self.assertTrue(any("Invalid or missing schema file" in e for e in (v.errors or [])),
f"Errors were: {v.errors}")

def test_schema_02_rejects_tree_and_custom(self):
base = {
"$schema": "https://sidewalks.washington.edu/opensidewalks/0.2/schema.json",
Expand Down Expand Up @@ -758,6 +856,32 @@ def test_pick_schema_force_single_schema_override(self):
self.assertEqual(v.pick_schema_for_file("/tmp/my.edges.geojson", {"features": []}), force)
self.assertEqual(v.pick_schema_for_file("/any/path.json", {"features": [{"geometry": {"type": "Point"}}]}), force)

def test_unexpected_exception_surfaces_unable_to_validate(self):
"""Any unexpected exception should be surfaced via 'Unable to validate'."""
fake_files = ["/tmp/nodes.geojson"]
with patch(_PATCH_ZIP) as PZip, \
patch(_PATCH_EV) as PVal, \
patch(_PATCH_VALIDATE, side_effect=RuntimeError("boom")), \
patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES):

z = MagicMock()
z.extract_zip.return_value = "/tmp/extracted"
z.remove_extracted_files.return_value = None
PZip.return_value = z

val = MagicMock()
val.files = fake_files
val.externalExtensions = []
val.is_valid.return_value = True
val.error = None
PVal.return_value = val

res = OSWValidation(zipfile_path="dummy.zip").validate()

self.assertFalse(res.is_valid)
self.assertTrue(any("Unable to validate: boom" in e for e in (res.errors or [])),
f"Errors were: {res.errors}")


class TestOSWValidationInvalidGeometryLogging(unittest.TestCase):
"""Covers the 'invalid geometries' logging branch, including _id-present and _id-missing fallback."""
Expand Down
Loading