Skip to content

Commit 199183d

Browse files
authored
Merge pull request #7 from TaskarCenterAtUW/dev
Dev to Main
2 parents 1979321 + aa868af commit 199183d

File tree

10 files changed

+232
-28
lines changed

10 files changed

+232
-28
lines changed

.github/workflows/publish_to_test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,5 +39,5 @@ jobs:
3939
uses: pypa/gh-action-pypi-publish@release/v1
4040
with:
4141
skip_existing: true
42-
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
42+
password: ${{ secrets.PYPI_API_TOKEN }}
4343
repository_url: https://test.pypi.org/legacy/

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
### 0.0.2
2+
- Fixed [Task-1347](https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/1347/).
3+
- Fixed package to removing the additional keys from the geojson files.
4+
- Introduced garbage collection to free up memory.
5+
- Added ability to skip the tags which are already present in the edges file.
6+
- Added ability to process the incline tags in batch processing.
7+
8+
19
### 0.0.1
210
- Introduces osw_inclination package which calculates the inclination of the sidewalk based on the DEM data.
311
- Added example.py file which demonstrates how to use the package.

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ osw_incline = OSWIncline(
4747
# Perform the incline calculation, it will add the incline to the original edges file
4848
result = osw_incline.calculate()
4949

50+
# To skip the incline tags which are already present in the edges file
51+
result = osw_incline.calculate(skip_existing_tags=True)
52+
53+
# To update the incline tags in batch processing (It might be faster than the normal calculation but increases the memory usage)
54+
result = osw_incline.calculate(batch_processing=True)
55+
5056
if result:
5157
print("Incline calculation completed successfully.")
5258
```

src/osw_incline/__init__.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import gc
12
import time
23
from typing import List
34
from pathlib import Path
@@ -6,6 +7,7 @@
67
from .version import __version__
78
from .dem_processor import DEMProcessor
89

10+
911
class OSWIncline:
1012
def __init__(self, dem_files: List[str], nodes_file: str, edges_file: str, debug=False):
1113
self.dem_files = dem_files
@@ -15,8 +17,7 @@ def __init__(self, dem_files: List[str], nodes_file: str, edges_file: str, debug
1517
if self.debug:
1618
Logger.debug('Debug mode is enabled')
1719

18-
19-
def calculate(self):
20+
def calculate(self, skip_existing_tags=False, batch_processing=False):
2021
try:
2122
if self.debug:
2223
Logger.debug('Starting calculation process')
@@ -32,8 +33,16 @@ def calculate(self):
3233
dem_processor = DEMProcessor(osm_graph=osm_graph, dem_files=self.dem_files, debug=self.debug)
3334
dem_processor.process(
3435
nodes_path=graph_nodes_path,
35-
edges_path=graph_edges_path
36+
edges_path=graph_edges_path,
37+
skip_existing_tags=skip_existing_tags,
38+
batch_processing=batch_processing
3639
)
40+
41+
# Delete osm_graph and dem_processor to force garbage collection
42+
osm_graph.clean()
43+
del osm_graph, dem_processor
44+
gc.collect()
45+
3746
end_time = time.time()
3847
time_taken = end_time - start_time
3948
if self.debug:
@@ -43,7 +52,8 @@ def calculate(self):
4352
if self.debug:
4453
Logger.error(f'Error processing DEM files: {e}')
4554
raise Exception(f'Error processing DEM files: {e}')
55+
finally:
56+
gc.collect()
4657

4758

48-
49-
OSWIncline.__version__ = __version__
59+
OSWIncline.__version__ = __version__

src/osw_incline/dem_processor.py

Lines changed: 61 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import gc
12
import math
23
import pyproj
34
import rasterio
@@ -21,23 +22,50 @@ def __init__(self, osm_graph: OSMGraph, dem_files: List[str], debug=False):
2122
self.OG = osm_graph
2223
self.debug = debug
2324

24-
def process(self, nodes_path, edges_path):
25+
def process(self, nodes_path, edges_path, skip_existing_tags=False, batch_processing=False):
26+
gc.disable()
2527
for dem_file in self.dem_files:
2628
dem_file_path = Path(dem_file)
2729
if self.debug:
2830
Logger.debug(f'Processing DEM tile: {dem_file_path}')
2931

3032
try:
3133
with rasterio.open(dem_file_path) as dem:
32-
for u, v, d in self.OG.G.edges(data=True):
33-
if 'geometry' in d:
34-
incline = self.infer_incline(linestring=d['geometry'], dem=dem, precision=3)
35-
if incline is not None:
36-
# Add incline to the edge properties
37-
d['incline'] = incline
38-
else:
39-
if self.debug:
40-
Logger.info(f'No geometry found for edge {u}-{v}')
34+
"""
35+
Option 1:
36+
Pros:
37+
Batching: This approach processes edges in batches of 1000, which can be faster for large graphs.
38+
Parallelization: The second approach can be parallelized by using a ThreadPoolExecutor or similar.
39+
Cons:
40+
Memory usage: The second approach stores all edges in a list, which could be memory-intensive for large graphs.
41+
Intermediate list storage: The second approach stores the entire edge set as a list in memory, which is not memory-efficient.
42+
"""
43+
if batch_processing:
44+
edges = list(self.OG.G.edges(data=True)) # Get all edges, even if fewer than batch_size
45+
self._process_in_batches(edges, dem, batch_size=1000, skip_existing_tags=skip_existing_tags)
46+
else:
47+
"""
48+
Option 2:
49+
Pros:
50+
Simple iteration: The first approach iterates over the edges one by one, making the memory footprint relatively small, especially if you have a large number of edges.
51+
No intermediate list storage: It does not store the entire edge set as a list in memory, which is better for memory efficiency.
52+
Cons:
53+
Single-threaded: The entire edge processing happens sequentially, which can be slower for very large graphs, as there's no batching or parallelization.
54+
No batching: It processes all edges at once in a loop, which could cause memory spikes during large computations if infer_incline holds intermediate states or large datasets.
55+
"""
56+
for u, v, d in self.OG.G.edges(data=True):
57+
if 'geometry' in d:
58+
if skip_existing_tags:
59+
if 'incline' in d and d['incline'] is not None:
60+
# If incline already exists, skip
61+
continue
62+
incline = self.infer_incline(linestring=d['geometry'], dem=dem, precision=3)
63+
if incline is not None:
64+
# Add incline to the edge properties
65+
d['incline'] = incline
66+
else:
67+
if self.debug:
68+
Logger.info(f'No geometry found for edge {u}-{v}')
4169

4270
self.OG.to_geojson(nodes_path, edges_path)
4371
except rasterio.errors.RasterioIOError:
@@ -48,6 +76,26 @@ def process(self, nodes_path, edges_path):
4876
if self.debug:
4977
Logger.error(f'Error processing DEM file: {dem_file_path}, error: {e}')
5078
raise Exception(f'Error processing DEM file: {dem_file_path}, error: {e}')
79+
finally:
80+
gc.collect()
81+
82+
gc.disable()
83+
84+
def _process_in_batches(self, edges, dem, batch_size=1000, skip_existing_tags=False):
85+
# Process edges in batches
86+
for i in range(0, len(edges), batch_size):
87+
batch = edges[i:i + batch_size]
88+
for u, v, d in batch:
89+
if 'geometry' in d:
90+
if skip_existing_tags:
91+
if 'incline' in d and d['incline'] is not None:
92+
# If incline already exists, skip
93+
continue
94+
incline = self.infer_incline(linestring=d['geometry'], dem=dem, precision=3)
95+
if incline is not None:
96+
d['incline'] = incline
97+
# Trigger garbage collection after each batch
98+
gc.collect()
5199

52100
def infer_incline(self, linestring, dem, precision=3):
53101
first_point = linestring.coords[0]
@@ -154,6 +202,7 @@ def interpolated_value(self, x, y, dem, method='idw', scaling_factor=1.0):
154202

155203
interpolated = interpolator(dx, dy, dem_arr)
156204

205+
del dem_arr
157206
if interpolated is None:
158207
return interpolated
159208
else:
@@ -187,6 +236,7 @@ def idw(self, dx, dy, masked_array):
187236

188237
value = weighted_values.sum()
189238

239+
del xs, ys, values_masked, weighted_values
190240

191241
if np.isnan(value):
192242
return None
@@ -210,4 +260,4 @@ def bivariate_spline(self, dx, dy, arr):
210260
spline = RectBivariateSpline(
211261
np.array(range(ncol)), np.array(range(nrow)), arr, kx=kx, ky=ky
212262
)
213-
return spline(dx, dy)[0][0]
263+
return spline(dx, dy)[0][0]

src/osw_incline/osm_graph.py

Lines changed: 42 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import gc
12
import json
23
import pyproj
34
import networkx as nx
45
from shapely.geometry import shape, mapping
56

7+
SCHEMA = 'https://sidewalks.washington.edu/opensidewalks/0.2/schema.json'
68

79
class OSMGraph:
810
def __init__(self, G=None):
@@ -29,13 +31,19 @@ def from_geojson(cls, nodes_path, edges_path):
2931
props['geometry'] = shape(node_feature['geometry'])
3032
G.add_node(n, **props)
3133

34+
del nodes_fc
35+
gc.collect()
36+
3237
for edge_feature in edges_fc['features']:
3338
props = edge_feature['properties']
3439
u = props.pop('_u_id')
3540
v = props.pop('_v_id')
3641
props['geometry'] = shape(edge_feature['geometry'])
3742
G.add_edge(u, v, **props)
3843

44+
del edges_fc
45+
gc.collect()
46+
3947
return osm_graph
4048

4149
def to_geojson(self, *args):
@@ -58,7 +66,18 @@ def to_geojson(self, *args):
5866
'geometry': geometry,
5967
'properties': d_copy
6068
})
61-
edges_fc = {'type': 'FeatureCollection', 'features': edge_features}
69+
edges_fc = {
70+
'type': 'FeatureCollection',
71+
'features': edge_features,
72+
'$schema': SCHEMA
73+
}
74+
75+
with open(edges_path, 'w') as f:
76+
json.dump(edges_fc, f)
77+
78+
# Delete edge_features and force garbage collection
79+
del edge_features, edges_fc
80+
gc.collect()
6281

6382
node_features = []
6483
for n, d in self.G.nodes(data=True):
@@ -82,14 +101,19 @@ def to_geojson(self, *args):
82101
'geometry': geometry,
83102
'properties': d_copy
84103
})
85-
nodes_fc = {'type': 'FeatureCollection', 'features': node_features}
86-
87-
with open(edges_path, 'w') as f:
88-
json.dump(edges_fc, f)
104+
nodes_fc = {
105+
'type': 'FeatureCollection',
106+
'features': node_features,
107+
'$schema': SCHEMA
108+
}
89109

90110
with open(nodes_path, 'w') as f:
91111
json.dump(nodes_fc, f)
92112

113+
# Delete node_features and force garbage collection
114+
del node_features, nodes_fc
115+
gc.collect()
116+
93117
if len(args) == 3:
94118
points_path = args[2]
95119
point_features = []
@@ -116,7 +140,19 @@ def to_geojson(self, *args):
116140
'geometry': geometry,
117141
'properties': d_copy
118142
})
119-
points_fc = {'type': 'FeatureCollection', 'features': point_features}
143+
points_fc = {
144+
'type': 'FeatureCollection',
145+
'features': point_features,
146+
'$schema': SCHEMA
147+
}
120148

121149
with open(points_path, 'w') as f:
122150
json.dump(points_fc, f)
151+
152+
# Delete point_features and force garbage collection
153+
del point_features, points_fc
154+
gc.collect()
155+
156+
def clean(self):
157+
del self.G
158+
gc.collect()

src/osw_incline/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '0.0.1'
1+
__version__ = '0.0.2'

tests/test_dem_processor.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,28 @@ def test_process_success(self, mock_rasterio_open):
3939

4040
self.osm_graph.to_geojson.assert_called_once_with('nodes.json', 'edges.json')
4141

42+
@patch('src.osw_incline.dem_processor.rasterio.open')
43+
def test_process_success_with_batching(self, mock_rasterio_open):
44+
mock_dem = MagicMock()
45+
mock_rasterio_open.return_value.__enter__.return_value = mock_dem
46+
self.osm_graph.G.edges.return_value = [('u', 'v', {'geometry': LineString([(0, 0), (1, 1)])})]
47+
48+
with patch.object(self.processor, 'infer_incline', return_value=0.1):
49+
self.processor.process('nodes.json', 'edges.json', batch_processing=True)
50+
51+
self.osm_graph.to_geojson.assert_called_once_with('nodes.json', 'edges.json')
52+
53+
@patch('src.osw_incline.dem_processor.rasterio.open')
54+
def test_process_success_with_skip(self, mock_rasterio_open):
55+
mock_dem = MagicMock()
56+
mock_rasterio_open.return_value.__enter__.return_value = mock_dem
57+
self.osm_graph.G.edges.return_value = [('u', 'v', {'geometry': LineString([(0, 0), (1, 1)])})]
58+
59+
with patch.object(self.processor, 'infer_incline', return_value=0.1):
60+
self.processor.process('nodes.json', 'edges.json', skip_existing_tags=True)
61+
62+
self.osm_graph.to_geojson.assert_called_once_with('nodes.json', 'edges.json')
63+
4264
# Test processing when RasterioIOError is raised
4365
@patch('src.osw_incline.dem_processor.rasterio.open')
4466
def test_process_rasterio_io_error(self, mock_rasterio_open):
@@ -401,7 +423,6 @@ def test_interpolated_value_return_scaled(self, mock_rasterio_open):
401423

402424
# Assert that the result is not None and that it is scaled properly
403425
self.assertIsNotNone(result, 'IDW interpolation should return a valid value')
404-
print(f"IDW Interpolated Value (scaled): {result}")
405426

406427
# You can adjust the expected result based on the IDW logic; here, just check that it's non-zero
407428
self.assertGreater(result, 0, 'The interpolated result should be greater than 0')

tests/test_osm_graph.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,12 @@ def test_to_geojson_with_points_and_osm_id(self):
256256
nodes_data = json.load(f)
257257
self.assertEqual(len(nodes_data['features']), 0)
258258

259+
def test_clean(self):
260+
osm_graph = OSMGraph.from_geojson(self.nodes_geojson, self.edges_geojson)
261+
self.assertEqual(len(osm_graph.G.nodes), 3)
262+
osm_graph.clean()
263+
self.assertFalse(hasattr(osm_graph, 'G'))
264+
259265

260266
if __name__ == '__main__':
261267
unittest.main()

0 commit comments

Comments
 (0)