1+ import gc
12import math
23import pyproj
34import rasterio
@@ -21,23 +22,50 @@ def __init__(self, osm_graph: OSMGraph, dem_files: List[str], debug=False):
2122 self .OG = osm_graph
2223 self .debug = debug
2324
24- def process (self , nodes_path , edges_path ):
25+ def process (self , nodes_path , edges_path , skip_existing_tags = False , batch_processing = False ):
26+ gc .disable ()
2527 for dem_file in self .dem_files :
2628 dem_file_path = Path (dem_file )
2729 if self .debug :
2830 Logger .debug (f'Processing DEM tile: { dem_file_path } ' )
2931
3032 try :
3133 with rasterio .open (dem_file_path ) as dem :
32- for u , v , d in self .OG .G .edges (data = True ):
33- if 'geometry' in d :
34- incline = self .infer_incline (linestring = d ['geometry' ], dem = dem , precision = 3 )
35- if incline is not None :
36- # Add incline to the edge properties
37- d ['incline' ] = incline
38- else :
39- if self .debug :
40- Logger .info (f'No geometry found for edge { u } -{ v } ' )
34+ """
35+ Option 1:
36+ Pros:
37+ Batching: This approach processes edges in batches of 1000, which can be faster for large graphs.
38+ Parallelization: The second approach can be parallelized by using a ThreadPoolExecutor or similar.
39+ Cons:
40+ Memory usage: The second approach stores all edges in a list, which could be memory-intensive for large graphs.
41+ Intermediate list storage: The second approach stores the entire edge set as a list in memory, which is not memory-efficient.
42+ """
43+ if batch_processing :
44+ edges = list (self .OG .G .edges (data = True )) # Get all edges, even if fewer than batch_size
45+ self ._process_in_batches (edges , dem , batch_size = 1000 , skip_existing_tags = skip_existing_tags )
46+ else :
47+ """
48+ Option 2:
49+ Pros:
50+ Simple iteration: The first approach iterates over the edges one by one, making the memory footprint relatively small, especially if you have a large number of edges.
51+ No intermediate list storage: It does not store the entire edge set as a list in memory, which is better for memory efficiency.
52+ Cons:
53+ Single-threaded: The entire edge processing happens sequentially, which can be slower for very large graphs, as there's no batching or parallelization.
54+ No batching: It processes all edges at once in a loop, which could cause memory spikes during large computations if infer_incline holds intermediate states or large datasets.
55+ """
56+ for u , v , d in self .OG .G .edges (data = True ):
57+ if 'geometry' in d :
58+ if skip_existing_tags :
59+ if 'incline' in d and d ['incline' ] is not None :
60+ # If incline already exists, skip
61+ continue
62+ incline = self .infer_incline (linestring = d ['geometry' ], dem = dem , precision = 3 )
63+ if incline is not None :
64+ # Add incline to the edge properties
65+ d ['incline' ] = incline
66+ else :
67+ if self .debug :
68+ Logger .info (f'No geometry found for edge { u } -{ v } ' )
4169
4270 self .OG .to_geojson (nodes_path , edges_path )
4371 except rasterio .errors .RasterioIOError :
@@ -48,6 +76,26 @@ def process(self, nodes_path, edges_path):
4876 if self .debug :
4977 Logger .error (f'Error processing DEM file: { dem_file_path } , error: { e } ' )
5078 raise Exception (f'Error processing DEM file: { dem_file_path } , error: { e } ' )
79+ finally :
80+ gc .collect ()
81+
82+ gc .disable ()
83+
84+ def _process_in_batches (self , edges , dem , batch_size = 1000 , skip_existing_tags = False ):
85+ # Process edges in batches
86+ for i in range (0 , len (edges ), batch_size ):
87+ batch = edges [i :i + batch_size ]
88+ for u , v , d in batch :
89+ if 'geometry' in d :
90+ if skip_existing_tags :
91+ if 'incline' in d and d ['incline' ] is not None :
92+ # If incline already exists, skip
93+ continue
94+ incline = self .infer_incline (linestring = d ['geometry' ], dem = dem , precision = 3 )
95+ if incline is not None :
96+ d ['incline' ] = incline
97+ # Trigger garbage collection after each batch
98+ gc .collect ()
5199
52100 def infer_incline (self , linestring , dem , precision = 3 ):
53101 first_point = linestring .coords [0 ]
@@ -154,6 +202,7 @@ def interpolated_value(self, x, y, dem, method='idw', scaling_factor=1.0):
154202
155203 interpolated = interpolator (dx , dy , dem_arr )
156204
205+ del dem_arr
157206 if interpolated is None :
158207 return interpolated
159208 else :
@@ -187,6 +236,7 @@ def idw(self, dx, dy, masked_array):
187236
188237 value = weighted_values .sum ()
189238
239+ del xs , ys , values_masked , weighted_values
190240
191241 if np .isnan (value ):
192242 return None
@@ -210,4 +260,4 @@ def bivariate_spline(self, dx, dy, arr):
210260 spline = RectBivariateSpline (
211261 np .array (range (ncol )), np .array (range (nrow )), arr , kx = kx , ky = ky
212262 )
213- return spline (dx , dy )[0 ][0 ]
263+ return spline (dx , dy )[0 ][0 ]
0 commit comments