Skip to content

Commit 1569d9b

Browse files
committed
Few more modifications
1 parent c3f57c2 commit 1569d9b

File tree

7 files changed

+151
-10
lines changed

7 files changed

+151
-10
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
- Fixed [Task-1347](https://dev.azure.com/TDEI-UW/TDEI/_workitems/edit/1347/).
33
- Fixed package to removing the additional keys from the geojson files.
44
- Introduced garbage collection to free up memory.
5+
- Added ability to skip the tags which are already present in the edges file.
6+
- Added ability to process the incline tags in batch processing.
57

68

79
### 0.0.1

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,12 @@ osw_incline = OSWIncline(
4747
# Perform the incline calculation, it will add the incline to the original edges file
4848
result = osw_incline.calculate()
4949

50+
# To skip the incline tags which are already present in the edges file
51+
result = osw_incline.calculate(skip_existing_tags=True)
52+
53+
# To update the incline tags in batch processing (It might be faster than the normal calculation but increases the memory usage)
54+
result = osw_incline.calculate(batch_processing=True)
55+
5056
if result:
5157
print("Incline calculation completed successfully.")
5258
```

src/osw_incline/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def __init__(self, dem_files: List[str], nodes_file: str, edges_file: str, debug
1717
if self.debug:
1818
Logger.debug('Debug mode is enabled')
1919

20-
def calculate(self):
20+
def calculate(self, skip_existing_tags=False, batch_processing=False):
2121
try:
2222
if self.debug:
2323
Logger.debug('Starting calculation process')
@@ -33,10 +33,13 @@ def calculate(self):
3333
dem_processor = DEMProcessor(osm_graph=osm_graph, dem_files=self.dem_files, debug=self.debug)
3434
dem_processor.process(
3535
nodes_path=graph_nodes_path,
36-
edges_path=graph_edges_path
36+
edges_path=graph_edges_path,
37+
skip_existing_tags=skip_existing_tags,
38+
batch_processing=batch_processing
3739
)
3840

3941
# Delete osm_graph and dem_processor to force garbage collection
42+
osm_graph.clean()
4043
del osm_graph, dem_processor
4144
gc.collect()
4245

src/osw_incline/dem_processor.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def __init__(self, osm_graph: OSMGraph, dem_files: List[str], debug=False):
2222
self.OG = osm_graph
2323
self.debug = debug
2424

25-
def process(self, nodes_path, edges_path):
25+
def process(self, nodes_path, edges_path, skip_existing_tags=False, batch_processing=False):
2626
gc.disable()
2727
for dem_file in self.dem_files:
2828
dem_file_path = Path(dem_file)
@@ -31,8 +31,41 @@ def process(self, nodes_path, edges_path):
3131

3232
try:
3333
with rasterio.open(dem_file_path) as dem:
34-
edges = list(self.OG.G.edges(data=True)) # Get all edges, even if fewer than batch_size
35-
self._process_in_batches(edges, dem, batch_size=1000)
34+
"""
35+
Option 1:
36+
Pros:
37+
Batching: This approach processes edges in batches of 1000, which can be faster for large graphs.
38+
Parallelization: The second approach can be parallelized by using a ThreadPoolExecutor or similar.
39+
Cons:
40+
Memory usage: The second approach stores all edges in a list, which could be memory-intensive for large graphs.
41+
Intermediate list storage: The second approach stores the entire edge set as a list in memory, which is not memory-efficient.
42+
"""
43+
if batch_processing:
44+
edges = list(self.OG.G.edges(data=True)) # Get all edges, even if fewer than batch_size
45+
self._process_in_batches(edges, dem, batch_size=1000, skip_existing_tags=skip_existing_tags)
46+
else:
47+
"""
48+
Option 2:
49+
Pros:
50+
Simple iteration: The first approach iterates over the edges one by one, making the memory footprint relatively small, especially if you have a large number of edges.
51+
No intermediate list storage: It does not store the entire edge set as a list in memory, which is better for memory efficiency.
52+
Cons:
53+
Single-threaded: The entire edge processing happens sequentially, which can be slower for very large graphs, as there's no batching or parallelization.
54+
No batching: It processes all edges at once in a loop, which could cause memory spikes during large computations if infer_incline holds intermediate states or large datasets.
55+
"""
56+
for u, v, d in self.OG.G.edges(data=True):
57+
if 'geometry' in d:
58+
if skip_existing_tags:
59+
if 'incline' in d and d['incline'] is not None:
60+
# If incline already exists, skip
61+
continue
62+
incline = self.infer_incline(linestring=d['geometry'], dem=dem, precision=3)
63+
if incline is not None:
64+
# Add incline to the edge properties
65+
d['incline'] = incline
66+
else:
67+
if self.debug:
68+
Logger.info(f'No geometry found for edge {u}-{v}')
3669

3770
self.OG.to_geojson(nodes_path, edges_path)
3871
except rasterio.errors.RasterioIOError:
@@ -48,12 +81,16 @@ def process(self, nodes_path, edges_path):
4881

4982
gc.disable()
5083

51-
def _process_in_batches(self, edges, dem, batch_size=1000):
84+
def _process_in_batches(self, edges, dem, batch_size=1000, skip_existing_tags=False):
5285
# Process edges in batches
5386
for i in range(0, len(edges), batch_size):
5487
batch = edges[i:i + batch_size]
5588
for u, v, d in batch:
5689
if 'geometry' in d:
90+
if skip_existing_tags:
91+
if 'incline' in d and d['incline'] is not None:
92+
# If incline already exists, skip
93+
continue
5794
incline = self.infer_incline(linestring=d['geometry'], dem=dem, precision=3)
5895
if incline is not None:
5996
d['incline'] = incline

src/osw_incline/osm_graph.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,4 +151,8 @@ def to_geojson(self, *args):
151151

152152
# Delete point_features and force garbage collection
153153
del point_features, points_fc
154-
gc.collect()
154+
gc.collect()
155+
156+
def clean(self):
157+
del self.G
158+
gc.collect()

tests/test_dem_processor.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,28 @@ def test_process_success(self, mock_rasterio_open):
3939

4040
self.osm_graph.to_geojson.assert_called_once_with('nodes.json', 'edges.json')
4141

42+
@patch('src.osw_incline.dem_processor.rasterio.open')
43+
def test_process_success_with_batching(self, mock_rasterio_open):
44+
mock_dem = MagicMock()
45+
mock_rasterio_open.return_value.__enter__.return_value = mock_dem
46+
self.osm_graph.G.edges.return_value = [('u', 'v', {'geometry': LineString([(0, 0), (1, 1)])})]
47+
48+
with patch.object(self.processor, 'infer_incline', return_value=0.1):
49+
self.processor.process('nodes.json', 'edges.json', batch_processing=True)
50+
51+
self.osm_graph.to_geojson.assert_called_once_with('nodes.json', 'edges.json')
52+
53+
@patch('src.osw_incline.dem_processor.rasterio.open')
54+
def test_process_success_with_skip(self, mock_rasterio_open):
55+
mock_dem = MagicMock()
56+
mock_rasterio_open.return_value.__enter__.return_value = mock_dem
57+
self.osm_graph.G.edges.return_value = [('u', 'v', {'geometry': LineString([(0, 0), (1, 1)])})]
58+
59+
with patch.object(self.processor, 'infer_incline', return_value=0.1):
60+
self.processor.process('nodes.json', 'edges.json', skip_existing_tags=True)
61+
62+
self.osm_graph.to_geojson.assert_called_once_with('nodes.json', 'edges.json')
63+
4264
# Test processing when RasterioIOError is raised
4365
@patch('src.osw_incline.dem_processor.rasterio.open')
4466
def test_process_rasterio_io_error(self, mock_rasterio_open):

tests/test_osw_incline.py

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,78 @@ def test_calculate_success(self, mock_logger_info, mock_time, mock_dem_processor
4444
)
4545
mock_dem_processor.assert_called_once_with(
4646
nodes_path=Path(self.nodes_file),
47-
edges_path=Path(self.edges_file)
47+
edges_path=Path(self.edges_file),
48+
skip_existing_tags=False,
49+
batch_processing=False
50+
)
51+
52+
@patch.object(OSMGraph, 'from_geojson', return_value=MagicMock())
53+
@patch('src.osw_incline.dem_processor.DEMProcessor.process', return_value=None)
54+
@patch('time.time', side_effect=[1, 5]) # Simulate time taken for the calculation
55+
@patch.object(Logger, 'info') # Mock the Logger to capture log calls
56+
def test_calculate_success_with_skip_existing_tags(self, mock_logger_info, mock_time, mock_dem_processor, mock_osm_graph):
57+
result = self.osw_incline.calculate(skip_existing_tags=True)
58+
59+
# Check if the process was successful
60+
self.assertTrue(result)
61+
62+
# Ensure the OSMGraph and DEMProcessor were used correctly
63+
mock_osm_graph.assert_called_once_with(
64+
nodes_path=Path(self.nodes_file),
65+
edges_path=Path(self.edges_file),
66+
)
67+
mock_dem_processor.assert_called_once_with(
68+
nodes_path=Path(self.nodes_file),
69+
edges_path=Path(self.edges_file),
70+
skip_existing_tags=True,
71+
batch_processing=False
72+
)
73+
74+
@patch.object(OSMGraph, 'from_geojson', return_value=MagicMock())
75+
@patch('src.osw_incline.dem_processor.DEMProcessor.process', return_value=None)
76+
@patch('time.time', side_effect=[1, 5]) # Simulate time taken for the calculation
77+
@patch.object(Logger, 'info') # Mock the Logger to capture log calls
78+
def test_calculate_success_with_batch_processing(self, mock_logger_info, mock_time, mock_dem_processor,
79+
mock_osm_graph):
80+
result = self.osw_incline.calculate(batch_processing=True)
81+
82+
# Check if the process was successful
83+
self.assertTrue(result)
84+
85+
# Ensure the OSMGraph and DEMProcessor were used correctly
86+
mock_osm_graph.assert_called_once_with(
87+
nodes_path=Path(self.nodes_file),
88+
edges_path=Path(self.edges_file),
4889
)
90+
mock_dem_processor.assert_called_once_with(
91+
nodes_path=Path(self.nodes_file),
92+
edges_path=Path(self.edges_file),
93+
skip_existing_tags=False,
94+
batch_processing=True
95+
)
96+
97+
@patch.object(OSMGraph, 'from_geojson', return_value=MagicMock())
98+
@patch('src.osw_incline.dem_processor.DEMProcessor.process', return_value=None)
99+
@patch('time.time', side_effect=[1, 5]) # Simulate time taken for the calculation
100+
@patch.object(Logger, 'info') # Mock the Logger to capture log calls
101+
def test_calculate_success_with_batching_and_skip_existing_tags(self, mock_logger_info, mock_time, mock_dem_processor,
102+
mock_osm_graph):
103+
result = self.osw_incline.calculate(skip_existing_tags=True, batch_processing=True)
104+
105+
# Check if the process was successful
106+
self.assertTrue(result)
49107

50-
# Check if the time taken was logged
51-
mock_logger_info.assert_called_once_with('Entire processing took: 4 seconds')
108+
# Ensure the OSMGraph and DEMProcessor were used correctly
109+
mock_osm_graph.assert_called_once_with(
110+
nodes_path=Path(self.nodes_file),
111+
edges_path=Path(self.edges_file),
112+
)
113+
mock_dem_processor.assert_called_once_with(
114+
nodes_path=Path(self.nodes_file),
115+
edges_path=Path(self.edges_file),
116+
skip_existing_tags=True,
117+
batch_processing=True
118+
)
52119

53120
# Test when OSMGraph.from_geojson raises an exception
54121
@patch.object(OSMGraph, 'from_geojson', side_effect=Exception("OSMGraph Error"))

0 commit comments

Comments
 (0)