Skip to content

Commit 563caef

Browse files
committed
datadeps: Make IntervalTree find_overlapping non-recursive
1 parent 36b25dc commit 563caef

File tree

1 file changed

+147
-67
lines changed

1 file changed

+147
-67
lines changed

src/utils/interval_tree.jl

Lines changed: 147 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -140,15 +140,36 @@ end
140140
function insert_node!(::Nothing, span::M) where M
141141
return IntervalNode(span)
142142
end
143-
function insert_node!(node::IntervalNode{M,E}, span::M) where {M,E}
144-
if span_start(span) <= span_start(node.span)
145-
node.left = insert_node!(node.left, span)
146-
else
147-
node.right = insert_node!(node.right, span)
143+
function insert_node!(root::IntervalNode{M,E}, span::M) where {M,E}
144+
# Use a queue to track the path for updating max_end after insertion
145+
path = Vector{IntervalNode{M,E}}()
146+
current = root
147+
148+
# Traverse to find the insertion point
149+
while current !== nothing
150+
push!(path, current)
151+
if span_start(span) <= span_start(current.span)
152+
if current.left === nothing
153+
current.left = IntervalNode(span)
154+
break
155+
end
156+
current = current.left
157+
else
158+
if current.right === nothing
159+
current.right = IntervalNode(span)
160+
break
161+
end
162+
current = current.right
163+
end
148164
end
149165

150-
update_max_end!(node)
151-
return node
166+
# Update max_end for all ancestors (process in reverse order)
167+
while !isempty(path)
168+
node = pop!(path)
169+
update_max_end!(node)
170+
end
171+
172+
return root
152173
end
153174

154175
# Remove a specific span from the tree (split as needed)
@@ -162,44 +183,78 @@ end
162183
function delete_node!(::Nothing, span::M) where M
163184
return nothing
164185
end
165-
function delete_node!(node::IntervalNode{M,E}, span::M) where {M,E}
166-
# Check for exact match first
167-
if span_start(node.span) == span_start(span) && span_len(node.span) == span_len(span)
168-
# Exact match, remove the node
169-
if node.left === nothing && node.right === nothing
170-
return nothing
171-
elseif node.left === nothing
172-
return node.right
173-
elseif node.right === nothing
174-
return node.left
186+
function delete_node!(root::IntervalNode{M,E}, span::M) where {M,E}
187+
# Track the path to the target node: (node, direction_to_child)
188+
path = Vector{Tuple{IntervalNode{M,E}, Symbol}}()
189+
current = root
190+
target = nothing
191+
target_type = :none # :exact or :overlap
192+
193+
# Phase 1: Search for target node
194+
while current !== nothing
195+
is_exact = span_start(current.span) == span_start(span) && span_len(current.span) == span_len(span)
196+
is_overlap = !is_exact && spans_overlap(current.span, span)
197+
198+
if is_exact
199+
target = current
200+
target_type = :exact
201+
break
202+
elseif is_overlap
203+
target = current
204+
target_type = :overlap
205+
break
206+
elseif span_start(span) <= span_start(current.span)
207+
push!(path, (current, :left))
208+
current = current.left
175209
else
176-
# Node has two children - replace with inorder successor
177-
successor = find_min(node.right)
178-
node.span = successor.span
179-
node.right = delete_node!(node.right, successor.span)
210+
push!(path, (current, :right))
211+
current = current.right
180212
end
181-
# Check for overlap
182-
elseif spans_overlap(node.span, span)
183-
# Handle overlapping spans by removing current node and adding remainders
184-
original_span = node.span
185-
186-
# Remove the current node first (same logic as exact match)
187-
if node.left === nothing && node.right === nothing
188-
# Leaf node - remove it and create a new subtree with remainders
189-
remaining_node = nothing
190-
elseif node.left === nothing
191-
remaining_node = node.right
192-
elseif node.right === nothing
193-
remaining_node = node.left
213+
end
214+
215+
if target === nothing
216+
return root
217+
end
218+
219+
# Phase 2: Compute replacement for target node
220+
original_span = target.span
221+
succ_path = Vector{IntervalNode{M,E}}() # Path to successor (for max_end updates)
222+
local replacement::Union{IntervalNode{M,E}, Nothing}
223+
224+
if target.left === nothing && target.right === nothing
225+
# Leaf node
226+
replacement = nothing
227+
elseif target.left === nothing
228+
# Only right child
229+
replacement = target.right
230+
elseif target.right === nothing
231+
# Only left child
232+
replacement = target.left
233+
else
234+
# Two children - find and remove inorder successor
235+
successor = find_min(target.right)
236+
237+
if target.right === successor
238+
# Successor is direct right child
239+
target.right = successor.right
194240
else
195-
# Node has two children - replace with inorder successor
196-
successor = find_min(node.right)
197-
node.span = successor.span
198-
node.right = delete_node!(node.right, successor.span)
199-
remaining_node = node
241+
# Track path to successor for max_end updates
242+
succ_parent = target.right
243+
push!(succ_path, succ_parent)
244+
while succ_parent.left !== successor
245+
succ_parent = succ_parent.left
246+
push!(succ_path, succ_parent)
247+
end
248+
# Remove successor by replacing with its right child
249+
succ_parent.left = successor.right
200250
end
201251

202-
# Calculate and insert the remaining portions
252+
target.span = successor.span
253+
replacement = target
254+
end
255+
256+
# Phase 3: Handle overlap case - add remaining portions
257+
if target_type == :overlap
203258
original_start = span_start(original_span)
204259
original_end = span_end(original_span)
205260
del_start = span_start(span)
@@ -212,7 +267,7 @@ function delete_node!(node::IntervalNode{M,E}, span::M) where {M,E}
212267
if left_end > original_start
213268
left_span = M(original_start, left_end - original_start)
214269
if !isempty(left_span)
215-
remaining_node = insert_node!(remaining_node, left_span)
270+
replacement = insert_node!(replacement, left_span)
216271
end
217272
end
218273
end
@@ -223,22 +278,39 @@ function delete_node!(node::IntervalNode{M,E}, span::M) where {M,E}
223278
if original_end > right_start
224279
right_span = M(right_start, original_end - right_start)
225280
if !isempty(right_span)
226-
remaining_node = insert_node!(remaining_node, right_span)
281+
replacement = insert_node!(replacement, right_span)
227282
end
228283
end
229284
end
285+
end
230286

231-
return remaining_node
232-
elseif span_start(span) <= span_start(node.span)
233-
node.left = delete_node!(node.left, span)
287+
# Phase 4: Update parent's child pointer
288+
if isempty(path)
289+
root = replacement
234290
else
235-
node.right = delete_node!(node.right, span)
291+
parent, dir = path[end]
292+
if dir == :left
293+
parent.left = replacement
294+
else
295+
parent.right = replacement
296+
end
236297
end
237298

238-
if node !== nothing
239-
update_max_end!(node)
299+
# Phase 5: Update max_end in correct order (bottom-up)
300+
# First: successor path (if any)
301+
for i in length(succ_path):-1:1
302+
update_max_end!(succ_path[i])
240303
end
241-
return node
304+
# Second: target node (if it wasn't removed)
305+
if replacement === target
306+
update_max_end!(target)
307+
end
308+
# Third: main path (ancestors of target)
309+
for i in length(path):-1:1
310+
update_max_end!(path[i][1])
311+
end
312+
313+
return root
242314
end
243315

244316
function find_min(node::IntervalNode)
@@ -263,28 +335,36 @@ function find_overlapping!(::Nothing, query::M, result::Vector{M}; exact::Bool=t
263335
return
264336
end
265337
function find_overlapping!(node::IntervalNode{M,E}, query::M, result::Vector{M}; exact::Bool=true) where {M,E}
266-
# Check if current node overlaps with query
267-
if spans_overlap(node.span, query)
268-
if exact
269-
# Get the overlapping portion of the span
270-
overlap = span_diff(node.span, query)
271-
verify_span(overlap)
272-
if !isempty(overlap)
273-
push!(result, overlap)
338+
# Use a queue for breadth-first traversal
339+
queue = Vector{IntervalNode{M,E}}()
340+
push!(queue, node)
341+
342+
while !isempty(queue)
343+
current = popfirst!(queue)
344+
345+
# Check if current node overlaps with query
346+
if spans_overlap(current.span, query)
347+
if exact
348+
# Get the overlapping portion of the span
349+
overlap = span_diff(current.span, query)
350+
verify_span(overlap)
351+
if !isempty(overlap)
352+
push!(result, overlap)
353+
end
354+
else
355+
push!(result, current.span)
274356
end
275-
else
276-
push!(result, node.span)
277357
end
278-
end
279358

280-
# Recursively search left subtree if it might contain overlapping intervals
281-
if node.left !== nothing && node.left.max_end > span_start(query)
282-
find_overlapping!(node.left, query, result; exact)
283-
end
359+
# Enqueue left subtree if it might contain overlapping intervals
360+
if current.left !== nothing && current.left.max_end > span_start(query)
361+
push!(queue, current.left)
362+
end
284363

285-
# Recursively search right subtree if query extends beyond current node's start
286-
if node.right !== nothing && span_end(query) > span_start(node.span)
287-
find_overlapping!(node.right, query, result; exact)
364+
# Enqueue right subtree if query extends beyond current node's start
365+
if current.right !== nothing && span_end(query) > span_start(current.span)
366+
push!(queue, current.right)
367+
end
288368
end
289369
end
290370

0 commit comments

Comments
 (0)