Skip to content

Commit 237ea18

Browse files
committed
-Updated schema file to OSW 0.2
-Added create_zip method to ZipFileHandler -Made all OSW files optional -Added additional validation steps based on the OSW network properties
1 parent 3d6342d commit 237ea18

File tree

5 files changed

+3519
-411
lines changed

5 files changed

+3519
-411
lines changed

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
jsonschema==4.19.1
22
zipfile36==0.1.3
3-
coverage==7.2.7
3+
coverage==7.2.7
4+
geopandas==0.13.2

src/python_osw_validation/__init__.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,38 @@
22
import json
33
import jsonschema
44
from typing import Dict, Any, Optional, List
5+
import geopandas as gpd
56
from .zipfile_handler import ZipFileHandler
67
from .extracted_data_validator import ExtractedDataValidator
78

89
SCHEMA_PATH = os.path.join(os.path.dirname(__file__), 'schema')
910

11+
OSW_dataset_files = {"edges": {
12+
"required": False,
13+
"geometry": "LineString"
14+
},
15+
"nodes": {
16+
"required": False,
17+
"geometry": "Point"
18+
},
19+
"points": {
20+
"required": False,
21+
"geometry": "Point"
22+
},
23+
"lines": {
24+
"required": False,
25+
"geometry": "LineString"
26+
},
27+
"zones": {
28+
"required": False,
29+
"geometry": "Polygon"
30+
},
31+
"polygons": {
32+
"required": False,
33+
"geometry": "Polygon"
34+
}
35+
}
36+
1037

1138
class ValidationResult:
1239
def __init__(self, is_valid: bool, errors: Optional[List[str]] = None):
@@ -34,6 +61,14 @@ def load_osw_schema(self, schema_path: str) -> Dict[str, Any]:
3461
except Exception as e:
3562
self.errors.append(f'Invalid or missing schema file: {e}')
3663
raise Exception(f'Invalid or missing schema file: {e}')
64+
65+
def unique_id(self, gdf):
66+
"""Check for duplicate values in the _id field"""
67+
duplicates = gdf[gdf.duplicated('_id', keep=False)]['_id'].unique()
68+
69+
is_valid = len(duplicates) == 0
70+
71+
return is_valid, list(duplicates)
3772

3873
def validate(self) -> ValidationResult:
3974
try:
@@ -58,6 +93,72 @@ def validate(self) -> ValidationResult:
5893
zip_handler.remove_extracted_files()
5994
return ValidationResult(False, self.errors)
6095

96+
# Validate data integrity
97+
OSW_dataset = {}
98+
for file in validator.files:
99+
file_path = os.path.join(file)
100+
osw_file = file_path.split('.')[-2]
101+
OSW_dataset[osw_file] = gpd.read_file(file_path)
102+
103+
# Are all id's unique in each file? No need to check uniqueness across files yet since we do not have a global OSW ID format yet
104+
for osw_file in OSW_dataset:
105+
is_valid, duplicates = self.unique_id(OSW_dataset[osw_file])
106+
if not is_valid:
107+
zip_handler.remove_extracted_files()
108+
self.errors.append(f"Duplicate _id's found in {osw_file} : {duplicates}")
109+
return ValidationResult(False, self.errors)
110+
111+
# Create sets of node id's and foreign keys to be used in validation
112+
if "nodes" in OSW_dataset:
113+
node_ids = set(OSW_dataset['nodes']['_id'])
114+
else:
115+
node_ids = set()
116+
117+
if "edges" in OSW_dataset:
118+
node_ids_edges_u = set(OSW_dataset['edges']['_u_id'])
119+
node_ids_edges_v = set(OSW_dataset['edges']['_v_id'])
120+
else:
121+
node_ids_edges_u = set()
122+
node_ids_edges_v = set()
123+
124+
if "zones" in OSW_dataset:
125+
node_ids_zones_w = set([item for sublist in OSW_dataset['zones']['_w_id'] for item in sublist])
126+
else:
127+
node_ids_zones_w = set()
128+
129+
# Do all node references in _u_id exist in nodes?
130+
unmatched = node_ids_edges_u - node_ids
131+
is_valid = len(unmatched) == 0
132+
if not is_valid:
133+
zip_handler.remove_extracted_files()
134+
self.errors.append(f"Foreign key constraints for edge start nodes failed, _u_id's of unmatched nodes: {unmatched}")
135+
return ValidationResult(False, self.errors)
136+
137+
# Do all node references in _v_id exist in nodes?
138+
unmatched = node_ids_edges_v - node_ids
139+
is_valid = len(unmatched) == 0
140+
if not is_valid:
141+
zip_handler.remove_extracted_files()
142+
self.errors.append(f"Foreign key constraints for edge end nodes failed, _v_id's of unmatched nodes: {unmatched}")
143+
return ValidationResult(False, self.errors)
144+
145+
# Do all node references in _w_id exist in nodes?
146+
unmatched = node_ids_zones_w - node_ids
147+
is_valid = len(unmatched) == 0
148+
if not is_valid:
149+
zip_handler.remove_extracted_files()
150+
self.errors.append(f"Foreign key constraints for zone nodes failed, _w_id's of unmatched nodes: {unmatched}")
151+
return ValidationResult(False, self.errors)
152+
153+
# Geometry validation: check geometry type in each file and test if coordinates make a shape that is reasonable geometric shape according to the Simple Feature Access standard
154+
for osw_file in OSW_dataset:
155+
invalid_geojson = OSW_dataset[osw_file][(OSW_dataset[osw_file].geometry.type != OSW_dataset_files[osw_file]['geometry']) | (OSW_dataset[osw_file].is_valid==False)]
156+
is_valid = len(invalid_geojson) == 0
157+
if not is_valid:
158+
zip_handler.remove_extracted_files()
159+
self.errors.append(f"Invalid {osw_file} geometries found, id's of invalid geometries: {set(invalid_geojson['_id'])}")
160+
return ValidationResult(False, self.errors)
161+
61162
return ValidationResult(True)
62163
except Exception as e:
63164
self.errors.append(f'Unable to validate: {e}')

src/python_osw_validation/extracted_data_validator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ def is_valid(self) -> bool:
2525
self.error = 'No .geojson files found in the specified directory or its subdirectories.'
2626
return False
2727

28-
required_files = {'nodes', 'edges'}
29-
optional_files = {'points'}
28+
required_files = {}
29+
optional_files = {'nodes', 'edges', 'points', 'lines', 'zones', 'polygons'}
3030
for filename in geojson_files:
3131
base_name = os.path.basename(filename)
3232
for required_file in required_files:

0 commit comments

Comments
 (0)