diff --git a/CHANGELOG.md b/CHANGELOG.md index 7072305..369f9d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,31 @@ # Change log +### 0.2.12 + +#### Added +- Per-geometry schema support: auto-picks Point/LineString/Polygon schemas with sensible defaults. +- Structured per-feature **issues** output (former “fixme”): one best, human-friendly message per feature. +- Friendly error formatter: + - Compacts `Enum` errors. + - Summarizes `anyOf` by unioning required keys → “must include one of: …”. +- `_feature_index_from_error()` to reliably extract `feature_index` from `jsonschema_rs` error paths. +- `_get_colset()` utility for safe set extraction with diagnostics for missing columns. +- Unit tests covering helpers, schema selection, and issues aggregation. + +#### Changed +- `validate()` now **streams** `jsonschema_rs` errors; legacy `errors` list remains but is capped by `max_errors`. +- `ValidationResult` now includes `issues`. +- Schema selection prefers geometry from the first feature; falls back to filename heuristics (`nodes/points`, `edges/lines`, `zones/polygons`). + +#### Fixed +- Robust GeoJSON/extension handling: + - Safe fallback to index when `_id` is missing. + - Non-serializable property detection in extensions (with clear messages). +- Safer flattening of `_w_id` (list-like) for zone validations. + +#### Migration Notes +- Prefer consuming `ValidationResult.issues` for per-feature UX and tooling. + ### 0.2.11 - Fixed [BUG-2065](https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/2065/) diff --git a/src/python_osw_validation/__init__.py b/src/python_osw_validation/__init__.py index a88d156..7894b0e 100644 --- a/src/python_osw_validation/__init__.py +++ b/src/python_osw_validation/__init__.py @@ -1,34 +1,100 @@ import os import gc import json -import jsonschema_rs +import traceback +from typing import Dict, Any, Optional, List, Tuple import geopandas as gpd +import jsonschema_rs + from .zipfile_handler import ZipFileHandler -from typing import Dict, Any, Optional, List from .extracted_data_validator import ExtractedDataValidator, OSW_DATASET_FILES from .version import __version__ -import traceback +from .helpers import _feature_index_from_error, _pretty_message, _rank_for SCHEMA_PATH = os.path.join(os.path.dirname(__file__), 'schema') class ValidationResult: - def __init__(self, is_valid: bool, errors: Optional[List[str]] = None): + """Container for validation outcome. + + * `errors`: high-level, human-readable strings (legacy behavior). + * `issues`: per-feature schema problems (former `fixme`), each item: + { 'filename': str, 'feature_index': Optional[int], 'error_message': List[str] } + """ + + def __init__(self, is_valid: bool, errors: Optional[List[str]] = None, + issues: Optional[List[Dict[str, Any]]] = None): self.is_valid = is_valid - self.errors = errors + if len(errors) == 0: + self.errors = None + else: + self.errors = errors + self.issues = issues class OSWValidation: default_schema_file_path = os.path.join(SCHEMA_PATH, 'opensidewalks.schema.json') - def __init__(self, zipfile_path: str, schema_file_path=None): + # per-geometry defaults + default_point_schema = os.path.join(SCHEMA_PATH, 'Point_schema.json') + default_line_schema = os.path.join(SCHEMA_PATH, 'Linestring_schema.json') + default_polygon_schema = os.path.join(SCHEMA_PATH, 'Polygon_schema.json') + + def __init__( + self, + zipfile_path: str, + schema_file_path=None, + point_schema_path: Optional[str] = None, + line_schema_path: Optional[str] = None, + polygon_schema_path: Optional[str] = None, + ): self.zipfile_path = zipfile_path - self.extracted_dir = None - self.errors = [] - if schema_file_path is None: - self.schema_file_path = OSWValidation.default_schema_file_path - else: - self.schema_file_path = schema_file_path + self.extracted_dir: Optional[str] = None + self.errors: List[str] = [] + # per-feature schema issues (formerly `fixme`) + self.issues: List[Dict[str, Any]] = [] + + # Legacy single schema (if set, used for all) + self.schema_file_path = schema_file_path # may be None + + # Per-geometry schemas (with defaults) + self.point_schema_path = point_schema_path or self.default_point_schema + self.line_schema_path = line_schema_path or self.default_line_schema + self.polygon_schema_path = polygon_schema_path or self.default_polygon_schema + + # ---------------------------- + # Utilities & helpers + # ---------------------------- + def log_errors(self, message: str, filename: Optional[str] = None, feature_index: Optional[int] = None): + """Helper to log errors in a consistent format.""" + self.errors.append(message) + self.issues.append({ + 'filename': filename, + 'feature_index': feature_index, + 'error_message': message, + }) + + # add this small helper inside OSWValidation (near other helpers) + def _get_colset(self, gdf: Optional[gpd.GeoDataFrame], col: str, filekey: str) -> set: + """Return set of a column if present; else log and return empty set.""" + if gdf is None: + return set() + if col not in gdf.columns: + self.log_errors(f"Missing required column '{col}' in {filekey}.", filekey, None) + return set() + try: + return set(gdf[col].dropna()) + except Exception: + # If non-hashable entries sneak in, coerce to str to keep moving + try: + return set(map(str, gdf[col].dropna())) + except Exception: + self.log_errors(f"Could not create set for column '{col}' in {filekey}.", filekey, None) + return set() + + # ---------------------------- + # Schema selection + # ---------------------------- def load_osw_schema(self, schema_path: str) -> Dict[str, Any]: """Load OSW Schema""" @@ -36,7 +102,11 @@ def load_osw_schema(self, schema_path: str) -> Dict[str, Any]: with open(schema_path, 'r') as file: return json.load(file) except Exception as e: - self.errors.append(f'Invalid or missing schema file: {e}') + self.log_errors( + message=f'Invalid or missing schema file: {e}', + filename=schema_path, + feature_index=None + ) raise Exception(f'Invalid or missing schema file: {e}') def are_ids_unique(self, gdf): @@ -45,9 +115,36 @@ def are_ids_unique(self, gdf): is_valid = len(duplicates) == 0 return is_valid, list(duplicates) + def pick_schema_for_file(self, file_path: str, geojson_data: Dict[str, Any]) -> str: + if self.schema_file_path: + return self.schema_file_path + try: + features = geojson_data.get('features', []) + if features: + gtype = (features[0].get('geometry') or {}).get('type') + if gtype == 'Point': + return self.point_schema_path + if gtype == 'LineString': + return self.line_schema_path + if gtype == 'Polygon': + return self.polygon_schema_path + except Exception: + pass + lower = os.path.basename(file_path).lower() + if 'node' in lower or 'point' in lower: + return self.point_schema_path + if 'edge' in lower or 'line' in lower: + return self.line_schema_path + if 'zone' in lower or 'polygon' in lower or 'area' in lower: + return self.polygon_schema_path + return self.line_schema_path + + # ---------------------------- + # Core validation entrypoint + # ---------------------------- def validate(self, max_errors=20) -> ValidationResult: zip_handler = None - OSW_DATASET = {} + OSW_DATASET: Dict[str, Optional[gpd.GeoDataFrame]] = {} validator = None try: # Extract the zipfile @@ -55,147 +152,224 @@ def validate(self, max_errors=20) -> ValidationResult: self.extracted_dir = zip_handler.extract_zip() if not self.extracted_dir: - self.errors.append(zip_handler.error) - return ValidationResult(False, self.errors) + self.log_errors( + message=zip_handler.error, + filename=self.zipfile_path, + feature_index=None + ) + return ValidationResult(False, self.errors, self.issues) # Validate the folder structure validator = ExtractedDataValidator(self.extracted_dir) if not validator.is_valid(): - self.errors.append(validator.error) - return ValidationResult(False, self.errors) + self.log_errors( + message=validator.error, + filename=self.extracted_dir, + feature_index=None + ) + return ValidationResult(False, self.errors, self.issues) + # Per-file schema validation → populate self.issues (fixme-like) for file in validator.files: file_path = os.path.join(file) if not self.validate_osw_errors(file_path=str(file_path), max_errors=max_errors): + # mirror legacy behavior: stop early when we hit the cap break if self.errors: - return ValidationResult(False, self.errors) + return ValidationResult(False, self.errors, self.issues) - # Validate data integrity + # Load GeoDataFrames for integrity checks for file in validator.files: file_path = os.path.join(file) - osw_file = next( - (osw_file_any for osw_file_any in OSW_DATASET_FILES.keys() if osw_file_any in file_path), '') - OSW_DATASET[osw_file] = gpd.read_file(file_path) + osw_file = next((osw_key for osw_key in OSW_DATASET_FILES.keys() + if osw_key in os.path.basename(file_path)), '') + try: + gdf = gpd.read_file(file_path) + except Exception as e: + self.log_errors( + message=f"Failed to read '{os.path.basename(file_path)}' as GeoJSON: {e}", + filename=os.path.basename(file_path), + feature_index=None + ) + gdf = None + if osw_file: + OSW_DATASET[osw_file] = gdf - # Are all id's unique in each file? No need to check uniqueness across files yet since we do not have a global OSW ID format yet - for osw_file in OSW_DATASET: - is_valid, duplicates = self.are_ids_unique(OSW_DATASET[osw_file]) + # Are all id's unique in each file? + for osw_file, gdf in OSW_DATASET.items(): + if gdf is None: + continue + is_valid, duplicates = self.are_ids_unique(gdf) if not is_valid: - self.errors.append(f"Duplicate _id's found in {osw_file} : {duplicates}") + self.log_errors( + message=f"Duplicate _id's found in {osw_file} : {duplicates}", + filename=osw_file, + feature_index=None + ) # Create sets of node id's and foreign keys to be used in validation - if 'nodes' in OSW_DATASET: - node_ids = set(OSW_DATASET['nodes']['_id']) - else: - node_ids = set() + nodes_df = OSW_DATASET.get('nodes') + edges_df = OSW_DATASET.get('edges') + zones_df = OSW_DATASET.get('zones') - if 'edges' in OSW_DATASET: - node_ids_edges_u = set(OSW_DATASET['edges']['_u_id']) - node_ids_edges_v = set(OSW_DATASET['edges']['_v_id']) - else: - node_ids_edges_u = set() - node_ids_edges_v = set() + node_ids = self._get_colset(nodes_df, '_id', 'nodes') if nodes_df is not None else set() + node_ids_edges_u = self._get_colset(edges_df, '_u_id', 'edges') if edges_df is not None else set() + node_ids_edges_v = self._get_colset(edges_df, '_v_id', 'edges') if edges_df is not None else set() - if 'zones' in OSW_DATASET: - node_ids_zones_w = set([item for sublist in OSW_DATASET['zones']['_w_id'] for item in sublist]) + # zones: _w_id is list-like per feature → flatten safely + if zones_df is not None: + if '_w_id' in zones_df.columns: + vals = zones_df['_w_id'].dropna().tolist() + node_ids_zones_w = set( + item + for sub in vals + for item in (sub if isinstance(sub, (list, tuple)) else [sub]) + ) + else: + self.log_errors("Missing required column '_w_id' in zones.", 'zones', None) + node_ids_zones_w = set() else: node_ids_zones_w = set() - # Do all node references in _u_id exist in nodes? - unmatched = node_ids_edges_u - node_ids - is_valid = len(unmatched) == 0 - if not is_valid: - unmatched_list = list(unmatched) - num_unmatched = len(unmatched_list) - limit = min(num_unmatched, 20) - displayed_unmatched = ', '.join(map(str, unmatched_list[:limit])) - self.errors.append( - f"All _u_id's in edges should be part of _id's mentioned in nodes. " - f"Showing {'20' if num_unmatched > 20 else 'all'} out of {len(unmatched)} unmatched _u_id's: {displayed_unmatched}" - ) + # Cross-file integrity checks (only when we have the prerequisite sets) + if node_ids and node_ids_edges_u: + unmatched = node_ids_edges_u - node_ids + if unmatched: + unmatched_list = list(unmatched) + num_unmatched = len(unmatched_list) + limit = min(num_unmatched, 20) + displayed_unmatched = ', '.join(map(str, unmatched_list[:limit])) + self.log_errors( + message=(f"All _u_id's in edges should be part of _id's mentioned in nodes. " + f"Showing {'20' if num_unmatched > 20 else 'all'} out of {num_unmatched} " + f"unmatched _u_id's: {displayed_unmatched}"), + filename='All', + feature_index=None + ) - # Do all node references in _v_id exist in nodes? - unmatched = node_ids_edges_v - node_ids - is_valid = len(unmatched) == 0 - if not is_valid: - unmatched_list = list(unmatched) - num_unmatched = len(unmatched_list) - limit = min(num_unmatched, 20) - displayed_unmatched = ', '.join(map(str, unmatched_list[:limit])) - self.errors.append( - f"All _v_id's in edges should be part of _id's mentioned in nodes. " - f"Showing {'20' if num_unmatched > 20 else 'all'} out of {len(unmatched)} unmatched _v_id's: {displayed_unmatched}" - ) + if node_ids and node_ids_edges_v: + unmatched = node_ids_edges_v - node_ids + if unmatched: + unmatched_list = list(unmatched) + num_unmatched = len(unmatched_list) + limit = min(num_unmatched, 20) + displayed_unmatched = ', '.join(map(str, unmatched_list[:limit])) + self.log_errors( + message=(f"All _v_id's in edges should be part of _id's mentioned in nodes. " + f"Showing {'20' if num_unmatched > 20 else 'all'} out of {num_unmatched} " + f"unmatched _v_id's: {displayed_unmatched}"), + filename='All', + feature_index=None + ) - # Do all node references in _w_id exist in nodes? - unmatched = node_ids_zones_w - node_ids - is_valid = len(unmatched) == 0 - if not is_valid: - unmatched_list = list(unmatched) - num_unmatched = len(unmatched_list) - limit = min(num_unmatched, 20) - displayed_unmatched = ', '.join(map(str, unmatched_list[:limit])) - self.errors.append( - f"All _w_id's in zones should be part of _id's mentioned in nodes. " - f"Showing {'20' if num_unmatched > 20 else 'all'} out of {len(unmatched)} unmatched _w_id's: {displayed_unmatched}" - ) + if node_ids and node_ids_zones_w: + unmatched = node_ids_zones_w - node_ids + if unmatched: + unmatched_list = list(unmatched) + num_unmatched = len(unmatched_list) + limit = min(num_unmatched, 20) + displayed_unmatched = ', '.join(map(str, unmatched_list[:limit])) + self.log_errors( + message=(f"All _w_id's in zones should be part of _id's mentioned in nodes. " + f"Showing {'20' if num_unmatched > 20 else 'all'} out of {num_unmatched} " + f"unmatched _w_id's: {displayed_unmatched}"), + filename='All', + feature_index=None + ) - # Geometry validation: check geometry type in each file and test if coordinates make a shape that is reasonable geometric shape according to the Simple Feature Access standard - for osw_file in OSW_DATASET: - invalid_geojson = OSW_DATASET[osw_file][ - (OSW_DATASET[osw_file].geometry.type != OSW_DATASET_FILES[osw_file]['geometry']) | ( - OSW_DATASET[osw_file].is_valid == False)] - is_valid = len(invalid_geojson) == 0 - if not is_valid: - invalid_ids = list(set(invalid_geojson['_id'])) + # Geometry validation: check geometry type and SFA validity + for osw_file, gdf in OSW_DATASET.items(): + if gdf is None: + continue + expected_geom = OSW_DATASET_FILES.get(osw_file, {}).get('geometry') + if expected_geom: + invalid_geojson = gdf[ + (gdf.geometry.type != expected_geom) | (gdf.is_valid == False) + ] + else: + invalid_geojson = gdf[gdf.is_valid == False] + + if len(invalid_geojson) > 0: + # Extract IDs if present, else fallback to index + ids_series = invalid_geojson['_id'] if '_id' in invalid_geojson.columns else invalid_geojson.index + invalid_ids = list(set(ids_series)) num_invalid = len(invalid_ids) limit = min(num_invalid, 20) - displayed_invalid = ', '.join(map(str, invalid_ids[:min(num_invalid, limit)])) - self.errors.append( - f"Showing {'20' if num_invalid > 20 else 'all'} out of {num_invalid} invalid {osw_file} geometries, " - f"id's of invalid geometries: {displayed_invalid}" - ) + displayed_invalid = ', '.join(map(str, invalid_ids[:limit])) + self.log_errors( + message=(f"Showing {'20' if num_invalid > 20 else 'all'} out of {num_invalid} " + f"invalid {osw_file} geometries, id's of invalid geometries: {displayed_invalid}"), + filename='All', + feature_index=None + ) # Validate OSW external extensions for file in validator.externalExtensions: file_path = os.path.join(file) file_name = os.path.basename(file) - extensionFile = gpd.read_file(file_path) + try: + extensionFile = gpd.read_file(file_path) + except Exception as e: + self.log_errors( + message=f"Failed to read extension '{file_name}' as GeoJSON: {e}", + filename=file_name, + feature_index=None + ) + continue + invalid_geojson = extensionFile[extensionFile.is_valid == False] - is_valid = len(invalid_geojson) == 0 - if not is_valid: + if len(invalid_geojson) > 0: try: - # Safely extract invalid _id or fallback to index if _id is missing invalid_ids = list(set(invalid_geojson.get('_id', invalid_geojson.index))) num_invalid = len(invalid_ids) limit = min(num_invalid, 20) displayed_invalid = ', '.join(map(str, invalid_ids[:limit])) - self.errors.append( - f"Invalid geometries found in extension file `{file_name}`. Showing {limit if num_invalid > 20 else 'all'} of {num_invalid} invalid geometry IDs: {displayed_invalid}" + self.log_errors( + message=(f"Invalid geometries found in extension file `{file_name}`. " + f"Showing {limit if num_invalid > 20 else 'all'} of {num_invalid} " + f"invalid geometry IDs: {displayed_invalid}"), + filename=file_name, + feature_index=None ) except Exception as e: - self.errors.append(f"Invalid features found in `{file_name}`, but failed to extract IDs: {e}") + self.log_errors( + message=f"Invalid features found in `{file_name}`, but failed to extract IDs: {e}", + filename=file_name, + feature_index=None + ) # Optional: Test serializability of extension file try: - for idx, row in extensionFile.drop(columns='geometry').iterrows(): + for _, row in extensionFile.drop(columns='geometry').iterrows(): json.dumps(row.to_dict()) except Exception as e: - self.errors.append(f"Extension file `{file_name}` has non-serializable properties: {e}") + self.log_errors( + message=f"Extension file `{file_name}` has non-serializable properties: {e}", + filename=file_name, + feature_index=None + ) break if self.errors: - return ValidationResult(False, self.errors) + return ValidationResult(False, self.errors, self.issues) else: - return ValidationResult(True) + return ValidationResult(True, [], self.issues) + except Exception as e: - self.errors.append(f'Unable to validate: {e}') + self.log_errors( + message=f'Unable to validate: {e}', + filename=None, + feature_index=None + ) traceback.print_exc() - return ValidationResult(False, self.errors) + return ValidationResult(False, self.errors, self.issues) finally: - del OSW_DATASET + # Cleanup extracted files + try: + del OSW_DATASET + except Exception: + pass if zip_handler: zip_handler.remove_extracted_files() @@ -204,25 +378,72 @@ def validate(self, max_errors=20) -> ValidationResult: # Additional memory cleanup for geopandas dataframes if validator: - for osw_file in validator.files: - if osw_file in locals(): - del osw_file + try: + for osw_file in getattr(validator, 'files', []): + if osw_file in locals(): + del osw_file + except Exception: + pass del validator gc.collect() def load_osw_file(self, graph_geojson_path: str) -> Dict[str, Any]: - """Load OSW Data""" with open(graph_geojson_path, 'r') as file: return json.load(file) def validate_osw_errors(self, file_path: str, max_errors: int) -> bool: - """Validate OSW Data against the schema and process all errors""" + """Validate one OSW GeoJSON against the appropriate schema (streaming). + + - Keeps legacy `self.errors` capped by `max_errors` (original behavior). + - While streaming, tracks the *best* error per feature (ranked) and, + before returning, pushes a single human-friendly message per feature + into `self.issues` (like your sample: "must include one of: ..."). + """ geojson_data = self.load_osw_file(file_path) - validator = jsonschema_rs.Draft7Validator(self.load_osw_schema(self.schema_file_path)) + schema_path = self.pick_schema_for_file(file_path, geojson_data) + schema = self.load_osw_schema(schema_path) + validator = jsonschema_rs.Draft7Validator(schema) + + filename = os.path.basename(file_path) + + # Per-feature best error accumulator (streaming) + # feature_idx -> (rank_tuple, error_obj) + best_by_feature: Dict[Optional[int], Tuple[tuple, Any]] = {} + feature_order: List[Optional[int]] = [] # preserve first-seen order + + # Legacy cap + legacy_count = 0 + + # --- STREAM over errors; STOP as soon as legacy hits the cap --- + for err in validator.iter_errors(geojson_data): + # legacy list (for backward compatibility) + if legacy_count < max_errors: + self.errors.append(f'Validation error: {getattr(err, "message", "")}') + legacy_count += 1 + else: + # We've reached the legacy cap; stop work to match original performance + break + + # Track the best error per feature + fidx = _feature_index_from_error(err) + r = _rank_for(err) + prev = best_by_feature.get(fidx) + if prev is None: + best_by_feature[fidx] = (r, err) + feature_order.append(fidx) + else: + if r < prev[0]: + best_by_feature[fidx] = (r, err) - for error in validator.iter_errors(geojson_data): - self.errors.append(f'Validation error: {error.message}') - if len(self.errors) >= max_errors: - return False + # Build per-feature issues (one concise message per feature) in first-seen order + for fidx in feature_order: + _, best_err = best_by_feature[fidx] + pretty = _pretty_message(best_err, schema) + self.issues.append({ + "filename": filename, + "feature_index": fidx if fidx is not None else -1, + "error_message": [pretty], + }) + # Mirror original boolean behavior: False when we exactly hit the cap return len(self.errors) < max_errors diff --git a/src/python_osw_validation/helpers.py b/src/python_osw_validation/helpers.py new file mode 100644 index 0000000..91e0808 --- /dev/null +++ b/src/python_osw_validation/helpers.py @@ -0,0 +1,97 @@ +from typing import Optional +import re + +def _feature_index_from_error(err) -> Optional[int]: + """ + Return the index after 'features' in the instance path, else None. + Works with jsonschema_rs errors. + """ + path = list(getattr(err, "instance_path", [])) + for i, seg in enumerate(path): + if seg == "features" and i + 1 < len(path) and isinstance(path[i + 1], int): + return path[i + 1] + return None + +def _err_kind(err) -> str: + """ + Best-effort classification of error kind. + Prefers jsonschema_rs 'kind', falls back to 'validator', then message. + """ + kobj = getattr(err, "kind", None) + if kobj is not None: + return type(kobj).__name__.split("_")[-1] # e.g. 'AnyOf', 'Enum', 'Required' + v = getattr(err, "validator", None) + if isinstance(v, str): + return v[0].upper() + v[1:] # 'anyOf' -> 'AnyOf' + msg = getattr(err, "message", "") or "" + return "AnyOf" if "anyOf" in msg else "" + + +def _clean_enum_message(err) -> str: + """Compact enum error (strip ‘…or N other candidates’).""" + msg = getattr(err, "message", "") or "" + msg = re.sub(r"\s*or\s+\d+\s+other candidates", "", msg) + return msg.split("\n")[0] + + +def _pretty_message(err, schema) -> str: + """ + Convert a jsonschema_rs error to a concise, user-friendly string. + + Special handling: + - Enum → compact message + - AnyOf → summarize the union of 'required' fields across branches: + "must include one of: " + """ + kind = _err_kind(err) + + if kind == "Enum": + return _clean_enum_message(err) + + if kind == "AnyOf": + # Follow schema_path to the anyOf node; union of 'required' keys in branches. + sub = schema + try: + for seg in getattr(err, "schema_path", []): + sub = sub[seg] + + required = set() + + def crawl(node): + if isinstance(node, dict): + if isinstance(node.get("required"), list): + required.update(node["required"]) + for key in ("allOf", "anyOf", "oneOf"): + if isinstance(node.get(key), list): + for child in node[key]: + crawl(child) + elif isinstance(node, list): + for child in node: + crawl(child) + + crawl(sub) + + if required: + props = ", ".join(sorted(required)) + return f"must include one of: {props}" + except Exception: + pass + + # Default: first line from library message + return (getattr(err, "message", "") or "").split("\n")[0] + + +def _rank_for(err) -> tuple: + """ + Ranking for 'best' error per feature. + Prefer Enum > (Type/Required/Const) > (Pattern/Minimum/Maximum) > others. + """ + kind = _err_kind(err) + order = ( + 0 if kind == "Enum" else + 1 if kind in {"Type", "Required", "Const"} else + 2 if kind in {"Pattern", "Minimum", "Maximum"} else + 3 + ) + length = len(getattr(err, "message", "") or "") + return (order, length) diff --git a/src/python_osw_validation/schema/Linestring_schema.json b/src/python_osw_validation/schema/Linestring_schema.json new file mode 100644 index 0000000..3475036 --- /dev/null +++ b/src/python_osw_validation/schema/Linestring_schema.json @@ -0,0 +1,2280 @@ +{ + "title": "root", + "type": "object", + "required": [ + "type", + "features" + ], + "additionalProperties": false, + "properties": { + "$schema": { + "description": "A field for the schema id.", + "enum": [ + "https://sidewalks.washington.edu/opensidewalks/0.2/schema.json" + ], + "type": "string" + }, + "dataSource": { + "additionalProperties": true, + "properties": {}, + "type": "object" + }, + "dataTimestamp": { + "format": "date-time", + "type": "string" + }, + "type": { + "title": "Feature Collection", + "type": "string", + "default": "FeatureCollection", + "enum": [ + "FeatureCollection" + ] + }, + "features": { + "title": "features array", + "type": "array", + "minItems": 1, + "additionalItems": false, + "items": { + "title": "FeatureObject", + "type": "object", + "required": [ + "type", + "geometry", + "properties" + ], + "additionalProperties": false, + "properties": { + "type": { + "title": "FeatureType", + "type": "string", + "default": "Feature", + "enum": [ + "Feature" + ] + }, + "geometry": { + "title": "geometryObject", + "type": "object", + "required": [ + "type", + "coordinates" + ], + "additionalProperties": false, + "properties": { + "type": { + "title": "GeometryType", + "type": "string", + "default": "LineString", + "enum": [ + "LineString" + ] + }, + "coordinates": { + "title": "coordinates", + "type": "array", + "minItems": 2, + "items": { + "type": "array", + "additionalItems": false, + "items": [ + { + "type": "number", + "minimum": -180, + "maximum": 180 + }, + { + "type": "number", + "minimum": -90, + "maximum": 90 + } + ] + } + } + } + }, + "properties": { + "title": "propertiesObject", + "type": "object", + "additionalProperties": false, + "properties": { + "_id": { + "minLength": 1, + "type": "string" + }, + "_u_id": { + "minLength": 1, + "type": "string" + }, + "_v_id": { + "minLength": 1, + "type": "string" + }, + "description": { + "description": "A free form text field for describing an entity. May be pre-encoded in relevant pedestrian paths to assist with routing instructing or investigation of map features. For example, a description of the sidewalk in relation to a nearby street may be a useful textual description, such as \"NE of Main St.\" Can also be considered a flexible location to embed arbitrary information for specific use cases.", + "type": "string" + }, + "foot": { + "description": "A field that indicates whether an edge can be used by pedestrians.", + "enum": [ + "designated", + "destination", + "no", + "permissive", + "private", + "use_sidepath", + "yes" + ], + "type": "string" + }, + "highway": { + "enum": [ + "service", + "footway", + "living_street", + "pedestrian", + "primary", + "residential", + "secondary", + "steps", + "tertiary", + "trunk", + "unclassified" + ], + "type": "string" + }, + "incline": { + "description": "A field for the estimated incline over a particular path, i.e. slope, i.e. grade, i.e. rise over run. If derived from OpenStreetMap data, this is the maximum incline over the path. If derived from DEM data, it is more likely to be an underestimation. Positive values indicate an uphill climb while negative are downhill. For example, a 45 degree downhill value for incline would be -1.0.", + "maximum": 1, + "minimum": -1, + "type": "number" + }, + "length": { + "description": "A field for the length of an entity in meters. This field is always inferred from the geometry.", + "maximum": 5000, + "minimum": 0, + "type": "number" + }, + "name": { + "description": "A field for a designated name for an entity. Example: an official name for a trail.", + "type": "string" + }, + "service": { + "enum": [ + "alley", + "driveway", + "parking_aisle" + ], + "type": "string" + }, + "surface": { + "description": "A field for the surface material of the path.", + "enum": [ + "asphalt", + "concrete", + "dirt", + "grass", + "grass_paver", + "gravel", + "paved", + "paving_stones", + "unpaved" + ], + "type": "string" + }, + "width": { + "description": "A field for width of an entity in meters.", + "maximum": 500, + "minimum": 0, + "type": "number" + }, + "crossing:markings": { + "description": "A field for markings on the ground which are meant to draw attention to the area where pedestrians are to cross the road.", + "enum": [ + "dashes", + "dots", + "ladder", + "ladder:paired", + "ladder:skewed", + "lines", + "lines:paired", + "lines:rainbow", + "no", + "pictograms", + "rainbow", + "skewed", + "surface", + "yes", + "zebra", + "zebra:bicolour", + "zebra:double", + "zebra:paired", + "zebra:rainbow" + ], + "type": "string" + }, + "footway": { + "enum": [ + "crossing", + "sidewalk", + "traffic_island" + ], + "type": "string" + }, + "barrier": { + "enum": [ + "fence" + ], + "type": "string" + }, + "climb": { + "description": "A field for the climb direction of steps. You can use \"up\" or \"down\" to indicate the direction of the climb relative to the direction of the edge.", + "enum": [ + "down", + "up" + ], + "type": "string" + }, + "step_count": { + "description": "A field for number of steps in stairs.", + "maximum": 500, + "minimum": 0, + "type": "integer" + } + }, + "required": [ + "_id" + ], + "patternProperties": { + "^ext:": {} + }, + "dependencies": { + "description": { + "anyOf": [ + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "alley" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "crossing" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "driveway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "living_street" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "parking_aisle" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "pedestrian" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "primary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "residential" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "secondary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "sidewalk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "steps" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "tertiary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "traffic_island" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "trunk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "unclassified" + } + } + } + ] + } + ] + }, + "foot": { + "anyOf": [ + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "alley" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "crossing" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "driveway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "living_street" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "parking_aisle" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "pedestrian" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "primary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "residential" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "secondary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "sidewalk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "steps" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "tertiary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "traffic_island" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "trunk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "unclassified" + } + } + } + ] + } + ] + }, + "incline": { + "anyOf": [ + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "alley" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "crossing" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "driveway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "living_street" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "parking_aisle" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "pedestrian" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "primary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "residential" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "secondary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "sidewalk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "steps" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "tertiary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "traffic_island" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "trunk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "unclassified" + } + } + } + ] + } + ] + }, + "length": { + "anyOf": [ + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "alley" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "crossing" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "driveway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "barrier" + ], + "properties": { + "barrier": { + "type": "string", + "const": "fence" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "living_street" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "parking_aisle" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "pedestrian" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "primary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "residential" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "secondary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "sidewalk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "steps" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "tertiary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "traffic_island" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "trunk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "unclassified" + } + } + } + ] + } + ] + }, + "surface": { + "anyOf": [ + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "alley" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "crossing" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "driveway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "living_street" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "parking_aisle" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "pedestrian" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "primary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "residential" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "secondary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "sidewalk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "steps" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "tertiary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "traffic_island" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "trunk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "unclassified" + } + } + } + ] + } + ] + }, + "width": { + "anyOf": [ + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "alley" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "crossing" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "driveway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "living_street" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + }, + { + "required": [ + "service" + ], + "properties": { + "service": { + "type": "string", + "const": "parking_aisle" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "pedestrian" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "primary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "residential" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "secondary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "service" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "sidewalk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "steps" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "tertiary" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "traffic_island" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "trunk" + } + } + } + ] + }, + { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "unclassified" + } + } + } + ] + } + ] + }, + "crossing:markings": { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "footway" + } + } + }, + { + "required": [ + "footway" + ], + "properties": { + "footway": { + "type": "string", + "const": "crossing" + } + } + } + ] + }, + "climb": { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "steps" + } + } + } + ] + }, + "step_count": { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "steps" + } + } + } + ] + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/src/python_osw_validation/schema/Point_schema.json b/src/python_osw_validation/schema/Point_schema.json new file mode 100644 index 0000000..e670443 --- /dev/null +++ b/src/python_osw_validation/schema/Point_schema.json @@ -0,0 +1,202 @@ +{ + "title": "root", + "type": "object", + "required": [ + "type", + "features" + ], + "additionalProperties": false, + "properties": { + "$schema": { + "description": "A field for the schema id.", + "enum": [ + "https://sidewalks.washington.edu/opensidewalks/0.2/schema.json" + ], + "type": "string" + }, + "dataSource": { + "additionalProperties": true, + "properties": {}, + "type": "object" + }, + "dataTimestamp": { + "format": "date-time", + "type": "string" + }, + "type": { + "title": "Feature Collection", + "type": "string", + "default": "FeatureCollection", + "enum": [ + "FeatureCollection" + ] + }, + "features": { + "title": "features array", + "type": "array", + "minItems": 1, + "additionalItems": false, + "items": { + "title": "FeatureObject", + "type": "object", + "required": [ + "type", + "geometry", + "properties" + ], + "additionalProperties": false, + "properties": { + "type": { + "title": "FeatureType", + "type": "string", + "default": "Feature", + "enum": [ + "Feature" + ] + }, + "geometry": { + "title": "geometryObject", + "type": "object", + "required": [ + "type", + "coordinates" + ], + "additionalProperties": false, + "properties": { + "type": { + "title": "GeometryType", + "type": "string", + "default": "Point", + "enum": [ + "Point" + ] + }, + "coordinates": { + "type": "array", + "additionalItems": false, + "items": [ + { + "type": "number", + "minimum": -180, + "maximum": 180 + }, + { + "type": "number", + "minimum": -90, + "maximum": 90 + } + ] + } + } + }, + "properties": { + "title": "propertiesObject", + "type": "object", + "additionalProperties": false, + "properties": { + "_id": { + "minLength": 1, + "type": "string" + }, + "amenity": { + "enum": [ + "bench", + "waste_basket" + ], + "type": "string" + }, + "barrier": { + "enum": [ + "bollard", + "kerb" + ], + "type": "string" + }, + "kerb": { + "enum": [ + "lowered", + "flush", + "raised", + "rolled" + ], + "type": "string" + }, + "tactile_paving": { + "description": "A field for whether a curb has a tactile (textured) surface. Tactile paving is a system of textured ground surface indicators found on footpaths, stairs and public transportation platforms to assist pedestrians who are blind or visually impaired. A tactile paving area has a surface that is easy to detect using a long cane, typically because it is rougher than the surrounding surface area or has an embossed pattern.", + "enum": [ + "contrasted", + "no", + "primitive", + "yes" + ], + "type": "string" + }, + "emergency": { + "enum": [ + "fire_hydrant" + ], + "type": "string" + }, + "man_made": { + "enum": [ + "manhole" + ], + "type": "string" + }, + "power": { + "enum": [ + "pole" + ], + "type": "string" + }, + "highway": { + "enum": [ + "street_lamp" + ], + "type": "string" + } + }, + "required": [ + "_id" + ], + "patternProperties": { + "^ext:": {} + }, + "dependencies": { + "kerb": { + "allOf": [ + { + "required": [ + "barrier" + ], + "properties": { + "barrier": { + "type": "string", + "const": "kerb" + } + } + } + ] + }, + "tactile_paving": { + "allOf": [ + { + "required": [ + "barrier" + ], + "properties": { + "barrier": { + "type": "string", + "const": "kerb" + } + } + } + ] + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/src/python_osw_validation/schema/Polygon_schema.json b/src/python_osw_validation/schema/Polygon_schema.json new file mode 100644 index 0000000..5ebefee --- /dev/null +++ b/src/python_osw_validation/schema/Polygon_schema.json @@ -0,0 +1,325 @@ +{ + "title": "root", + "type": "object", + "required": [ + "type", + "features" + ], + "additionalProperties": false, + "properties": { + "$schema": { + "description": "A field for the schema id.", + "enum": [ + "https://sidewalks.washington.edu/opensidewalks/0.2/schema.json" + ], + "type": "string" + }, + "dataSource": { + "additionalProperties": true, + "properties": {}, + "type": "object" + }, + "dataTimestamp": { + "format": "date-time", + "type": "string" + }, + "type": { + "title": "Feature Collection", + "type": "string", + "default": "FeatureCollection", + "enum": [ + "FeatureCollection" + ] + }, + "features": { + "title": "features array", + "type": "array", + "minItems": 1, + "additionalItems": false, + "items": { + "title": "FeatureObject", + "type": "object", + "required": [ + "type", + "geometry", + "properties" + ], + "additionalProperties": false, + "properties": { + "type": { + "title": "FeatureType", + "type": "string", + "default": "Feature", + "enum": [ + "Feature" + ] + }, + "geometry": { + "title": "geometryObject", + "type": "object", + "required": [ + "type", + "coordinates" + ], + "additionalProperties": false, + "properties": { + "type": { + "title": "GeometryType", + "type": "string", + "default": "Polygon", + "enum": [ + "Polygon" + ] + }, + "coordinates": { + "type": "array", + "minItems": 1, + "items": { + "type": "array", + "minItems": 4, + "items": { + "type": "array", + "additionalItems": false, + "items": [ + { + "type": "number", + "minimum": -180, + "maximum": 180 + }, + { + "type": "number", + "minimum": -90, + "maximum": 90 + } + ] + } + } + } + } + }, + "properties": { + "title": "propertiesObject", + "type": "object", + "additionalProperties": false, + "properties": { + "_id": { + "minLength": 1, + "type": "string" + }, + "building": { + "description": "A field for markings a given object as a building.", + "enum": [ + "allotment_house", + "apartments", + "bakehouse", + "barn", + "barracks", + "beach_hut", + "boathouse", + "bridge", + "bungalow", + "bunker", + "cabin", + "carport", + "castle", + "cathedral", + "chapel", + "church", + "civic", + "college", + "commercial", + "conservatory", + "construction", + "container", + "cowshed", + "detached", + "digester", + "dormitory", + "farm", + "farm_auxiliary", + "fire_station", + "garage", + "garages", + "gatehouse", + "ger", + "government", + "grandstand", + "greenhouse", + "guardhouse", + "hangar", + "hospital", + "hotel", + "house", + "houseboat", + "hut", + "industrial", + "kindergarten", + "kingdom_hall", + "kiosk", + "livestock", + "military", + "monastery", + "mosque", + "museum", + "office", + "outbuilding", + "pagoda", + "parking", + "pavilion", + "presbytery", + "public", + "quonset_hut", + "religious", + "residential", + "retail", + "riding_hall", + "roof", + "ruins", + "school", + "semidetached_house", + "service", + "shed", + "shrine", + "silo", + "slurry_tank", + "sports_centre", + "sports_hall", + "stable", + "stadium", + "static_caravan", + "stilt_house", + "storage_tank", + "sty", + "supermarket", + "synagogue", + "tech_cab", + "temple", + "tent", + "terrace", + "toilets", + "tower", + "train_station", + "transformer_tower", + "transportation", + "tree_house", + "trullo", + "university", + "warehouse", + "water_tower", + "windmill", + "yes" + ], + "type": "string" + }, + "name": { + "description": "A field for a designated name for an entity. Example: an official name for a trail.", + "type": "string" + }, + "opening_hours": { + "description": "A field for the opening hours of an entity. The value is in OpenStreetMap syntax for the opening_hours tag. See [OpenStreetMap specification](https://wiki.openstreetmap.org/wiki/Key:opening_hours/specification) on the formatting for this field.", + "type": "string" + }, + "_w_id": { + "items": { + "type": "string" + }, + "type": "array" + }, + "description": { + "description": "A free form text field for describing an entity. May be pre-encoded in relevant pedestrian paths to assist with routing instructing or investigation of map features. For example, a description of the sidewalk in relation to a nearby street may be a useful textual description, such as \"NE of Main St.\" Can also be considered a flexible location to embed arbitrary information for specific use cases.", + "type": "string" + }, + "foot": { + "description": "A field that indicates whether an edge can be used by pedestrians.", + "enum": [ + "designated", + "destination", + "no", + "permissive", + "private", + "use_sidepath", + "yes" + ], + "type": "string" + }, + "highway": { + "enum": [ + "pedestrian" + ], + "type": "string" + }, + "surface": { + "description": "A field for the surface material of the path.", + "enum": [ + "asphalt", + "concrete", + "dirt", + "grass", + "grass_paver", + "gravel", + "paved", + "paving_stones", + "unpaved" + ], + "type": "string" + } + }, + "required": [ + "_id" + ], + "patternProperties": { + "^ext:": {} + }, + "dependencies": { + "description": { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "pedestrian" + } + } + } + ] + }, + "foot": { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "pedestrian" + } + } + } + ] + }, + "surface": { + "allOf": [ + { + "required": [ + "highway" + ], + "properties": { + "highway": { + "type": "string", + "const": "pedestrian" + } + } + } + ] + } + } + } + } + } + } + } +} \ No newline at end of file diff --git a/src/python_osw_validation/version.py b/src/python_osw_validation/version.py index 2418de5..0b1bdb2 100644 --- a/src/python_osw_validation/version.py +++ b/src/python_osw_validation/version.py @@ -1 +1 @@ -__version__ = '0.2.11' \ No newline at end of file +__version__ = '0.2.12' \ No newline at end of file diff --git a/tests/unit_tests/test_helpers.py b/tests/unit_tests/test_helpers.py new file mode 100644 index 0000000..4a4e316 --- /dev/null +++ b/tests/unit_tests/test_helpers.py @@ -0,0 +1,127 @@ +import unittest +import src.python_osw_validation.helpers as helpers + + +class FakeErr: + """Tiny stand-in for jsonschema_rs errors.""" + def __init__(self, instance_path=None, kind=None, validator=None, message="", schema_path=None): + self.instance_path = instance_path if instance_path is not None else [] + self.kind = kind + self.validator = validator + self.message = message + self.schema_path = schema_path if schema_path is not None else [] + + +# ----- tests for _feature_index_from_error ------------------------------------ +class TestFeatureIndexFromError(unittest.TestCase): + def test_feature_index_present(self): + e = FakeErr(instance_path=["features", 5, "properties", "x"]) + self.assertEqual(helpers._feature_index_from_error(e), 5) + + def test_feature_index_absent(self): + e = FakeErr(instance_path=["foo", "bar", 3]) # not "features/" + self.assertIsNone(helpers._feature_index_from_error(e)) + + def test_feature_index_next_not_int(self): + e = FakeErr(instance_path=["features", "not-an-int", "properties"]) + self.assertIsNone(helpers._feature_index_from_error(e)) + + +# ----- tests for _err_kind ----------------------------------------------------- +class TestErrKind(unittest.TestCase): + def test_prefers_kind_object(self): + # class name "Kind_Required" -> "Required" + KindRequired = type("Kind_Required", (), {}) + e = FakeErr(kind=KindRequired()) + self.assertEqual(helpers._err_kind(e), "Required") + + def test_fallback_to_validator(self): + e = FakeErr(kind=None, validator="anyOf") + self.assertEqual(helpers._err_kind(e), "AnyOf") + + def test_fallback_to_message(self): + e = FakeErr(kind=None, validator=None, message="... failed anyOf constraint ...") + self.assertEqual(helpers._err_kind(e), "AnyOf") + + def test_empty_when_unknown(self): + e = FakeErr(kind=None, validator=None, message="totally unrelated") + self.assertEqual(helpers._err_kind(e), "") + + +# ----- tests for _clean_enum_message ------------------------------------------ +class TestCleanEnumMessage(unittest.TestCase): + def test_strips_other_candidates_and_trims(self): + e = FakeErr(message="value 'x' not permitted or 2 other candidates\nextra details here") + self.assertEqual(helpers._clean_enum_message(e), "value 'x' not permitted") + + def test_no_noise_no_change(self): + e = FakeErr(message="must be one of [A,B,C]") + self.assertEqual(helpers._clean_enum_message(e), "must be one of [A,B,C]") + + +# ----- tests for _pretty_message ---------------------------------------------- +class TestPrettyMessage(unittest.TestCase): + def test_enum_compacts_message(self): + KindEnum = type("Kind_Enum", (), {}) + e = FakeErr(kind=KindEnum(), + message="not in allowed set or 3 other candidates\nignore this") + self.assertEqual(helpers._pretty_message(e, schema={}), "not in allowed set") + + def test_anyof_unions_required_fields(self): + # Build a schema reachable via schema_path with anyOf/allOf nesting + schema = { + "properties": { + "features": { + "items": { + "anyOf": [ + {"required": ["a", "b"]}, + {"allOf": [ + {"required": ["c"]}, + {"anyOf": [{"required": ["d"]}]} + ]}, + ] + } + } + } + } + KindAnyOf = type("Kind_AnyOf", (), {}) + e = FakeErr( + kind=KindAnyOf(), + schema_path=["properties", "features", "items", "anyOf"], + message="", + ) + msg = helpers._pretty_message(e, schema) + # Union should be a,b,c,d — order is sorted in helper + self.assertEqual(msg, "must include one of: a, b, c, d") + + def test_default_first_line_from_message(self): + e = FakeErr(kind=None, validator=None, message="first line only\nsecond line ignored") + self.assertEqual(helpers._pretty_message(e, schema={}), "first line only") + + +# ----- tests for _rank_for ----------------------------------------------------- +class TestRankFor(unittest.TestCase): + def test_ordering_by_kind(self): + KEnum = type("Kind_Enum", (), {}) + KReq = type("Kind_Required", (), {}) + KPat = type("Kind_Pattern", (), {}) + KOther = type("Kind_SomethingElse", (), {}) + + e_enum = FakeErr(kind=KEnum(), message="m1") + e_req = FakeErr(kind=KReq(), message="m2") + e_pat = FakeErr(kind=KPat(), message="m3") + e_other = FakeErr(kind=KOther(), message="m4") + + self.assertLess(helpers._rank_for(e_enum), helpers._rank_for(e_req)) + self.assertLess(helpers._rank_for(e_req), helpers._rank_for(e_pat)) + self.assertLess(helpers._rank_for(e_pat), helpers._rank_for(e_other)) + + def test_tiebreaker_shorter_message_is_better(self): + KType = type("Kind_Type", (), {}) + e_short = FakeErr(kind=KType(), message="short") + e_long = FakeErr(kind=KType(), message="a much longer message to increase length") + self.assertLess(helpers._rank_for(e_short), helpers._rank_for(e_long)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit_tests/test_osw_validation_extras.py b/tests/unit_tests/test_osw_validation_extras.py new file mode 100644 index 0000000..863ddd1 --- /dev/null +++ b/tests/unit_tests/test_osw_validation_extras.py @@ -0,0 +1,445 @@ +import os +import unittest +from unittest.mock import patch, MagicMock +import geopandas as gpd +from shapely.geometry import Point, LineString, Polygon + +import src.python_osw_validation as osw_mod +from src.python_osw_validation import OSWValidation + +# Build a robust patch prefix from the module actually imported +_PATCH_PREFIX = osw_mod.__name__ +_PATCH_UNIQUE = f"{_PATCH_PREFIX}.OSWValidation.are_ids_unique" +_PATCH_ZIP = f"{_PATCH_PREFIX}.ZipFileHandler" +_PATCH_EV = f"{_PATCH_PREFIX}.ExtractedDataValidator" +_PATCH_READ_FILE = f"{_PATCH_PREFIX}.gpd.read_file" +_PATCH_VALIDATE = f"{_PATCH_PREFIX}.OSWValidation.validate_osw_errors" +_PATCH_DATASET_FILES = f"{_PATCH_PREFIX}.OSW_DATASET_FILES" + +# A tiny canonical mapping that matches our mocked basenames +_CANON_DATASET_FILES = { + "nodes": {"geometry": "Point"}, + "edges": {"geometry": "LineString"}, + "zones": {"geometry": "Polygon"}, +} + + +class TestOSWValidationExtras(unittest.TestCase): + """Additional tests covering edge-cases introduced by the new validator.""" + + # ---------- helpers to build small GeoDataFrames ---------- + def _gdf_nodes(self, ids): + return gpd.GeoDataFrame( + {"_id": ids, "geometry": [Point(0, i) for i in range(len(ids))]}, + geometry="geometry", + crs="EPSG:4326", + ) + + def _gdf_edges(self, u_ids=None, v_ids=None, n=1, ids=None): + """Edges with a default _id column to avoid KeyError in duplicated('_id').""" + if ids is None: + ids = list(range(1, n + 1)) + data = { + "_id": ids, + "geometry": [LineString([(0, 0), (1, 1)]) for _ in range(n)], + } + if u_ids is not None: + data["_u_id"] = u_ids + if v_ids is not None: + data["_v_id"] = v_ids + return gpd.GeoDataFrame(data, geometry="geometry", crs="EPSG:4326") + + def _gdf_zones(self, w_ids_lists, n=None, ids=None): + """Zones with a default _id column to avoid KeyError in duplicated('_id').""" + if n is None: + n = len(w_ids_lists) + if ids is None: + ids = list(range(1, n + 1)) + polys = [Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) for _ in range(n)] + return gpd.GeoDataFrame({"_id": ids, "_w_id": w_ids_lists, "geometry": polys}, + geometry="geometry", crs="EPSG:4326") + + # ---------- shared fakes for Zip + ExtractedDataValidator ---------- + def _fake_validator(self, files, external_exts=None, valid=True, error="folder invalid"): + val = MagicMock() + val.files = files + val.externalExtensions = external_exts or [] + val.is_valid.return_value = valid + val.error = error + return val + + # ---------------- tests ---------------- + + def test_missing_u_id_logged_and_no_keyerror(self): + """Edges missing `_u_id` should log a friendly error instead of raising KeyError.""" + fake_files = ["/tmp/nodes.geojson", "/tmp/edges.geojson"] + nodes = self._gdf_nodes([1, 2]) + # edges WITHOUT _u_id; include _id to bypass duplicated('_id') KeyError + edges = self._gdf_edges(u_ids=None, v_ids=[1, 2], n=2, ids=[101, 102]) + + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE, return_value=True), \ + patch(_PATCH_READ_FILE) as PRead, \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES): + + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + z.remove_extracted_files.return_value = None + z.error = "extraction failed" + PZip.return_value = z + + PVal.return_value = self._fake_validator(fake_files) + + def _rf(path): + b = os.path.basename(path) + if "nodes" in b: + return nodes + if "edges" in b: + return edges + return gpd.GeoDataFrame() + PRead.side_effect = _rf + + res = OSWValidation(zipfile_path="dummy.zip").validate() + self.assertFalse(res.is_valid, f"Expected invalid; errors={res.errors}") + self.assertTrue(any("_u_id" in e and "Missing required column" in e for e in (res.errors or [])), + f"Errors were: {res.errors}") + + def test_unmatched_u_id_is_limited_to_20(self): + """When there are many unmatched _u_id values, only 20 are listed.""" + fake_files = ["/tmp/nodes.geojson", "/tmp/edges.geojson"] + nodes = self._gdf_nodes([1, 2]) + # edges have 25 u_ids (23 unmatched vs nodes {1,2}); include _id to bypass KeyError + edges = self._gdf_edges(u_ids=list(range(1, 26)), v_ids=list(range(1, 26)), n=25, ids=list(range(100, 125))) + + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE, return_value=True), \ + patch(_PATCH_READ_FILE) as PRead, \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES): + + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + z.remove_extracted_files.return_value = None + PZip.return_value = z + PVal.return_value = self._fake_validator(fake_files) + + def _rf(path): + b = os.path.basename(path) + return nodes if "nodes" in b else edges + PRead.side_effect = _rf + + res = OSWValidation(zipfile_path="dummy.zip").validate() + self.assertFalse(res.is_valid, f"Expected invalid; errors={res.errors}") + msg = next((e for e in (res.errors or []) if "_u_id" in e and "unmatched" in e), None) + self.assertIsNotNone(msg, f"Expected unmatched _u_id error not found. Errors: {res.errors}") + self.assertIn("Showing 20 out of", msg) + displayed = msg.split(":")[-1].strip() + if displayed: + shown_ids = [x.strip() for x in displayed.split(",")] + self.assertLessEqual(len(shown_ids), 20) + + def test_unmatched_w_id_is_limited_to_20(self): + fake_files = ["/tmp/nodes.geojson", "/tmp/zones.geojson"] + nodes = self._gdf_nodes([1, 2, 3]) + # zones have many _w_id that do not exist in nodes; include _id to bypass KeyError + w_lists = [[i, i + 100] for i in range(1, 26)] # each row has 2 ids (50 candidates) + zones = self._gdf_zones(w_lists, ids=list(range(1000, 1000 + len(w_lists)))) + + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE, return_value=True), \ + patch(_PATCH_READ_FILE) as PRead, \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES): + + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + PZip.return_value = z + PVal.return_value = self._fake_validator(fake_files) + + def _rf(path): + b = os.path.basename(path) + return nodes if "nodes" in b else zones + PRead.side_effect = _rf + + res = OSWValidation(zipfile_path="dummy.zip").validate() + self.assertFalse(res.is_valid, f"Expected invalid; errors={res.errors}") + msg = next((e for e in (res.errors or []) if "_w_id" in e and "unmatched" in e), None) + self.assertIsNotNone(msg, f"Expected unmatched _w_id error not found. Errors: {res.errors}") + self.assertIn("Showing 20 out of", msg) + displayed = msg.split(":")[-1].strip() + if displayed: + shown_ids = [x.strip() for x in displayed.split(",")] + self.assertLessEqual(len(shown_ids), 20) + + def test_duplicate_ids_detection(self): + """Duplicates inside a single file are reported.""" + fake_files = ["/tmp/nodes.geojson"] + nodes = self._gdf_nodes([1, 2, 2, 3]) # duplicate "2" + + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE, return_value=True), \ + patch(_PATCH_READ_FILE, return_value=nodes), \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES): + + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + PZip.return_value = z + PVal.return_value = self._fake_validator(fake_files) + + res = OSWValidation(zipfile_path="dummy.zip").validate() + self.assertFalse(res.is_valid) + self.assertTrue(any("Duplicate _id's found in nodes" in e for e in (res.errors or []))) + + def test_pick_schema_by_geometry_and_by_filename(self): + """Point/LineString/Polygon ⇒ proper schema; filename fallback when features empty.""" + v = OSWValidation(zipfile_path="dummy.zip") + + self.assertEqual( + v.pick_schema_for_file("/any/path.json", {"features": [{"geometry": {"type": "Point"}}]}), + v.point_schema_path, + ) + self.assertEqual( + v.pick_schema_for_file("/any/path.json", {"features": [{"geometry": {"type": "LineString"}}]}), + v.line_schema_path, + ) + self.assertEqual( + v.pick_schema_for_file("/any/path.json", {"features": [{"geometry": {"type": "Polygon"}}]}), + v.polygon_schema_path, + ) + self.assertEqual( + v.pick_schema_for_file("/tmp/my.nodes.geojson", {"features": []}), + v.point_schema_path, + ) + self.assertEqual( + v.pick_schema_for_file("/tmp/my.edges.geojson", {"features": []}), + v.line_schema_path, + ) + self.assertEqual( + v.pick_schema_for_file("/tmp/my.zones.geojson", {"features": []}), + v.polygon_schema_path, + ) + + def test_zip_extract_failure_bubbles_as_error(self): + """If zip extraction fails, we get a clean error and False result.""" + with patch(_PATCH_ZIP) as PZip: + z = MagicMock() + z.extract_zip.return_value = None + z.error = "Failed to extract zip" + PZip.return_value = z + + res = OSWValidation(zipfile_path="bad.zip").validate() + self.assertFalse(res.is_valid) + self.assertTrue(any("Failed to extract zip" in e for e in (res.errors or []))) + + def test_extracted_data_validator_invalid(self): + """If folder structure is invalid, its error is surfaced.""" + with patch(_PATCH_ZIP) as PZip, patch(_PATCH_EV) as PVal: + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + PZip.return_value = z + + PVal.return_value = self._fake_validator(files=[], valid=False, error="bad structure") + + res = OSWValidation(zipfile_path="dummy.zip").validate() + self.assertFalse(res.is_valid) + self.assertTrue(any("bad structure" in e for e in (res.errors or []))) + + def test_issues_populated_for_invalid_zip(self): + """Ensure `issues` contains per-feature messages when validation fails.""" + assets = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "assets") + invalid_zip = os.path.join(assets, "invalid.zip") + if os.path.exists(invalid_zip): + res = OSWValidation(zipfile_path=invalid_zip).validate() + self.assertFalse(res.is_valid) + self.assertIsInstance(res.issues, list) + self.assertGreater(len(res.issues), 0) + ex = res.issues[0] + self.assertIn("filename", ex) + self.assertIn("feature_index", ex) + self.assertIn("error_message", ex) + else: + # Mock a minimal run that forces one schema error → issues populated + fake_files = ["/tmp/nodes.geojson"] + nodes = self._gdf_nodes([1]) + + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE) as PSchema, \ + patch(_PATCH_READ_FILE, return_value=nodes), \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES): + + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + PZip.return_value = z + PVal.return_value = self._fake_validator(fake_files) + + def _schema_side_effect(self_obj, *args, **kwargs): + # simulate legacy error and per-feature issue + self_obj.errors.append("Validation error: dummy schema error") + self_obj.issues.append({ + "filename": "nodes.geojson", + "feature_index": 0, + "error_message": ["dummy message"], + }) + return True + + PSchema.side_effect = _schema_side_effect + + res = OSWValidation(zipfile_path="dummy.zip").validate() + self.assertFalse(res.is_valid) + self.assertGreater(len(res.issues), 0) + self.assertIn("dummy message", res.issues[0]["error_message"]) + +class TestOSWValidationInternals(unittest.TestCase): + """Covers `_get_colset` and `pick_schema_for_file` internals.""" + + # ---------- helpers ---------- + def _gdf(self, data, geom="Point"): + if geom == "Point": + g = [Point(0, i) for i in range(len(next(iter(data.values()))))] + elif geom == "LineString": + g = [LineString([(0, 0), (1, 1)]) for _ in range(len(next(iter(data.values()))))] + else: + g = [Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) for _ in range(len(next(iter(data.values()))))] + data = {**data, "geometry": g} + return gpd.GeoDataFrame(data, geometry="geometry", crs="EPSG:4326") + + # ---------- _get_colset ---------- + def test_get_colset_returns_set_when_column_present(self): + v = OSWValidation(zipfile_path="dummy.zip") + gdf = self._gdf({"_id": [1, 2, 2, None, 3]}, geom="Point") + s = v._get_colset(gdf, "_id", "nodes") + self.assertEqual(s, {1, 2, 3}) + + def test_get_colset_logs_and_returns_empty_when_missing(self): + v = OSWValidation(zipfile_path="dummy.zip") + gdf = self._gdf({"foo": [1, 2]}, geom="Point") + s = v._get_colset(gdf, "_id", "nodes") + self.assertEqual(s, set()) + self.assertTrue(any("Missing required column '_id' in nodes." in e for e in v.errors)) + + def test_get_colset_handles_unhashable_by_stringifying(self): + v = OSWValidation(zipfile_path="dummy.zip") + # dicts are unhashable; method should stringify + gdf = self._gdf({"meta": [{"a": 1}, {"b": 2}, None]}, geom="Point") + s = v._get_colset(gdf, "meta", "nodes") + self.assertEqual(s, {str({"a": 1}), str({"b": 2})}) + # and should not log an error for existing column + self.assertFalse(any("meta" in e and "Could not create set" in e for e in (v.errors or []))) + + def test_get_colset_with_none_gdf(self): + v = OSWValidation(zipfile_path="dummy.zip") + s = v._get_colset(None, "_id", "nodes") + self.assertEqual(s, set()) + + # ---------- pick_schema_for_file ---------- + def test_pick_schema_by_geometry(self): + v = OSWValidation(zipfile_path="dummy.zip") + self.assertEqual( + v.pick_schema_for_file("/x/y.json", {"features": [{"geometry": {"type": "Point"}}]}), + v.point_schema_path, + ) + self.assertEqual( + v.pick_schema_for_file("/x/y.json", {"features": [{"geometry": {"type": "LineString"}}]}), + v.line_schema_path, + ) + self.assertEqual( + v.pick_schema_for_file("/x/y.json", {"features": [{"geometry": {"type": "Polygon"}}]}), + v.polygon_schema_path, + ) + + def test_pick_schema_filename_fallback(self): + v = OSWValidation(zipfile_path="dummy.zip") + self.assertEqual(v.pick_schema_for_file("/tmp/my.nodes.geojson", {"features": []}), v.point_schema_path) + self.assertEqual(v.pick_schema_for_file("/tmp/my.edges.geojson", {"features": []}), v.line_schema_path) + self.assertEqual(v.pick_schema_for_file("/tmp/my.zones.geojson", {"features": []}), v.polygon_schema_path) + + def test_pick_schema_force_single_schema_override(self): + force = "/forced/opensidewalks.schema.json" + v = OSWValidation(zipfile_path="dummy.zip", schema_file_path=force) + # should always return forced schema when provided + self.assertEqual(v.pick_schema_for_file("/tmp/my.edges.geojson", {"features": []}), force) + self.assertEqual(v.pick_schema_for_file("/any/path.json", {"features": [{"geometry": {"type": "Point"}}]}), force) + + +class TestOSWValidationInvalidGeometryLogging(unittest.TestCase): + """Covers the 'invalid geometries' logging branch, including _id-present and _id-missing fallback.""" + + def _gdf_edges_wrong_geom(self, n, with_id=True): + # Expected geometry (from patched OSW_DATASET_FILES) is LineString, + # so we intentionally use Points to trigger invalids via type mismatch. + data = {} + if with_id: + data["_id"] = list(range(100, 100 + n)) + # build Point geoms + pts = [Point(i, i) for i in range(n)] + data["geometry"] = pts + return gpd.GeoDataFrame(data, geometry="geometry", crs="EPSG:4326") + + def _gdf_nodes(self, ids): + return gpd.GeoDataFrame( + {"_id": ids, "geometry": [Point(0, i) for i in range(len(ids))]}, + geometry="geometry", + crs="EPSG:4326", + ) + + def _fake_validator(self, files, valid=True, error="folder invalid"): + m = MagicMock() + m.files = files + m.externalExtensions = [] + m.is_valid.return_value = valid + m.error = error + return m + + def test_invalid_geometry_logs_ids_when__id_present(self): + """When _id exists, the message should list some _id values and total count.""" + fake_files = ["/tmp/edges.geojson"] + edges = self._gdf_edges_wrong_geom(n=3, with_id=True) + + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE, return_value=True), \ + patch(_PATCH_READ_FILE, return_value=edges), \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES): + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + z.remove_extracted_files.return_value = None + PZip.return_value = z + PVal.return_value = self._fake_validator(fake_files) + + res = OSWValidation(zipfile_path="dummy.zip").validate() + self.assertFalse(res.is_valid) + # Expect the invalid geometry message for 'edges' + msg = next((e for e in (res.errors or []) if "invalid edges geometries" in e), None) + self.assertIsNotNone(msg, f"No invalid-geometry message found. Errors: {res.errors}") + self.assertIn("Showing all out of 3", msg) + + def test_invalid_geometry_logs_index_when__id_missing_and_caps_20(self): + """When _id is missing, it falls back to index and caps display at 20 of N.""" + fake_files = ["/tmp/edges.geojson"] + edges = self._gdf_edges_wrong_geom(n=25, with_id=False) # 25 invalid features, no _id column + + with patch(_PATCH_ZIP) as PZip, \ + patch(_PATCH_EV) as PVal, \ + patch(_PATCH_VALIDATE, return_value=True), \ + patch(_PATCH_READ_FILE, return_value=edges), \ + patch(_PATCH_DATASET_FILES, _CANON_DATASET_FILES), \ + patch(_PATCH_UNIQUE, return_value=(True, [])): # <-- bypass duplicates check entirely + + z = MagicMock() + z.extract_zip.return_value = "/tmp/extracted" + z.remove_extracted_files.return_value = None + PZip.return_value = z + PVal.return_value = self._fake_validator(fake_files) + + res = OSWValidation(zipfile_path="dummy.zip").validate() + self.assertFalse(res.is_valid, f"Expected invalid; errors={res.errors}") + msg = next((e for e in (res.errors or []) if "invalid edges geometries" in e), None) + self.assertIsNotNone(msg, f"No invalid-geometry message found. Errors: {res.errors}") + self.assertIn("Showing 20 out of 25", msg) + + +if __name__ == "__main__": + unittest.main()