Skip to content

Commit 2865098

Browse files
committed
Fix for Task 1078
- Fix for Task [1078](https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/1078) - Added the functionality to stop processing further if there are 20 errors(default) - max_errors can be passed as a parameter to validate function - Added unit test cases - Updated README.md - Updated CHANGELOG.md - Updated Package version
1 parent cf610ff commit 2865098

File tree

5 files changed

+54
-16
lines changed

5 files changed

+54
-16
lines changed

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# Change log
22

3+
### 0.2.2
4+
- Added functionality to get the specific umber of errors
5+
```
6+
validator = OSWValidation(zipfile_path=<ZIP_FILE_PATH>)
7+
result = validator.validate() // will return only first 20 errors by default
8+
result = validator.validate(max_errors=10) // will return only first 10 errors
9+
```
10+
311
### 0.2.1
412
- Updated zipfile_handler
513
- Fixed "No .geojson files found in the specified directory or its subdirectories." issue

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,13 @@ This package validates the OSW geojson file. Package requires a OSW zip file pat
2828
from python_osw_validation import OSWValidation
2929

3030
validator = OSWValidation(zipfile_path='<Zip file path>')
31-
result = validator.validate()
31+
result = validator.validate()
3232
print(result.is_valid)
33-
print(result.errors)
33+
print(result.errors) # will return first 20 errors by default if there are errors
34+
35+
result = validator.validate(max_errors=10)
36+
print(result.is_valid)
37+
print(result.errors) # will return first 10 errors depending on the max_errors parameter
3438

3539
```
3640

src/python_osw_validation/__init__.py

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def are_ids_unique(self, gdf):
4545

4646
return is_valid, list(duplicates)
4747

48-
def validate(self) -> ValidationResult:
48+
def validate(self, max_errors=20) -> ValidationResult:
4949
try:
5050
# Extract the zipfile
5151
zip_handler = ZipFileHandler(self.zipfile_path)
@@ -62,7 +62,8 @@ def validate(self) -> ValidationResult:
6262
return ValidationResult(False, self.errors)
6363
for file in validator.files:
6464
file_path = os.path.join(file)
65-
is_valid = self.validate_osw_errors(file_path)
65+
if not self.validate_osw_errors(file_path, max_errors):
66+
break
6667

6768
if self.errors:
6869
zip_handler.remove_extracted_files()
@@ -72,7 +73,8 @@ def validate(self) -> ValidationResult:
7273
OSW_dataset = {}
7374
for file in validator.files:
7475
file_path = os.path.join(file)
75-
osw_file = next((osw_file_any for osw_file_any in OSW_dataset_files.keys() if osw_file_any in file_path), '')
76+
osw_file = next(
77+
(osw_file_any for osw_file_any in OSW_dataset_files.keys() if osw_file_any in file_path), '')
7678
OSW_dataset[osw_file] = gpd.read_file(file_path)
7779

7880
# Are all id's unique in each file? No need to check uniqueness across files yet since we do not have a global OSW ID format yet
@@ -103,35 +105,42 @@ def validate(self) -> ValidationResult:
103105
unmatched = node_ids_edges_u - node_ids
104106
is_valid = len(unmatched) == 0
105107
if not is_valid:
106-
self.errors.append(f"All _u_id's in edges should be part of _id's mentioned in nodes, _u_id's not in nodes are: {unmatched}")
108+
self.errors.append(
109+
f"All _u_id's in edges should be part of _id's mentioned in nodes, _u_id's not in nodes are: {unmatched}")
107110

108111
# Do all node references in _v_id exist in nodes?
109112
unmatched = node_ids_edges_v - node_ids
110113
is_valid = len(unmatched) == 0
111114
if not is_valid:
112-
self.errors.append(f"All _v_id's in edges should be part of _id's mentioned in nodes, _v_id's not in nodes are: {unmatched}")
115+
self.errors.append(
116+
f"All _v_id's in edges should be part of _id's mentioned in nodes, _v_id's not in nodes are: {unmatched}")
113117

114118
# Do all node references in _w_id exist in nodes?
115119
unmatched = node_ids_zones_w - node_ids
116120
is_valid = len(unmatched) == 0
117121
if not is_valid:
118-
self.errors.append(f"All _w_id's in zones should be part of _id's mentioned in nodes, _w_id's not in nodes are: {unmatched}")
122+
self.errors.append(
123+
f"All _w_id's in zones should be part of _id's mentioned in nodes, _w_id's not in nodes are: {unmatched}")
119124

120125
# Geometry validation: check geometry type in each file and test if coordinates make a shape that is reasonable geometric shape according to the Simple Feature Access standard
121126
for osw_file in OSW_dataset:
122-
invalid_geojson = OSW_dataset[osw_file][(OSW_dataset[osw_file].geometry.type != OSW_dataset_files[osw_file]['geometry']) | (OSW_dataset[osw_file].is_valid==False)]
127+
invalid_geojson = OSW_dataset[osw_file][
128+
(OSW_dataset[osw_file].geometry.type != OSW_dataset_files[osw_file]['geometry']) | (
129+
OSW_dataset[osw_file].is_valid == False)]
123130
is_valid = len(invalid_geojson) == 0
124131
if not is_valid:
125-
self.errors.append(f"Invalid {osw_file} geometries found, id's of invalid geometries: {set(invalid_geojson['_id'])}")
132+
self.errors.append(
133+
f"Invalid {osw_file} geometries found, id's of invalid geometries: {set(invalid_geojson['_id'])}")
126134

127135
# Validate OSW external extensions
128136
for file in validator.externalExtensions:
129137
file_path = os.path.join(file)
130138
extensionFile = gpd.read_file(file_path)
131-
invalid_geojson = extensionFile[extensionFile.is_valid==False]
139+
invalid_geojson = extensionFile[extensionFile.is_valid == False]
132140
is_valid = len(invalid_geojson) == 0
133141
if not is_valid:
134-
self.errors.append(f"Invalid geometries found in extension file {file}, list of invalid geometries: {invalid_geojson.to_json()}")
142+
self.errors.append(
143+
f"Invalid geometries found in extension file {file}, list of invalid geometries: {invalid_geojson.to_json()}")
135144

136145
if self.errors:
137146
zip_handler.remove_extracted_files()
@@ -147,14 +156,17 @@ def load_osw_file(self, graph_geojson_path: str) -> Dict[str, Any]:
147156
with open(graph_geojson_path, 'r') as file:
148157
return json.load(file)
149158

150-
def validate_osw_errors(self, file_path: str) -> bool:
159+
def validate_osw_errors(self, file_path: str, max_errors: int) -> bool:
151160
'''Validate OSW Data against the schema and process all errors'''
152161
geojson_data = self.load_osw_file(file_path)
153162
validator = jsonschema.Draft7Validator(self.load_osw_schema(self.schema_file_path))
154163
errors = list(validator.iter_errors(geojson_data))
155164

156165
if errors:
157-
for error in errors:
158-
self.errors.append(f'Validation error: {error.message}')
166+
for index, error in enumerate(errors):
167+
if index < max_errors:
168+
self.errors.append(f'Validation error: {error.message}')
169+
if len(self.errors) == max_errors:
170+
break
159171
return False
160172
return True
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.2.1'
1+
__version__ = '0.2.2'

tests/unit_tests/test_osw_validation.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,20 @@ def test_invalid_zipfile_with_schema(self):
7676
self.assertFalse(result.is_valid)
7777
self.assertIsNotNone(result.errors)
7878

79+
def test_invalid_zipfile_default_error_count(self):
80+
validation = OSWValidation(zipfile_path=self.invalid_zipfile)
81+
result = validation.validate()
82+
self.assertFalse(result.is_valid)
83+
self.assertIsNotNone(result.errors)
84+
self.assertEqual(20, len(result.errors))
85+
86+
def test_invalid_zipfile_should_return_10_errors(self):
87+
validation = OSWValidation(zipfile_path=self.invalid_zipfile)
88+
result = validation.validate(max_errors=10)
89+
self.assertFalse(result.is_valid)
90+
self.assertIsNotNone(result.errors)
91+
self.assertEqual(10, len(result.errors))
92+
7993
def test_invalid_zipfile_with_invalid_schema(self):
8094
validation = OSWValidation(zipfile_path=self.invalid_zipfile,
8195
schema_file_path=self.invalid_schema_file_path)

0 commit comments

Comments
 (0)