tskit-dev · hossam26644 · Jul 29, 2025 · Dec 2, 2025 · Dec 3, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,11 +7,10 @@
 
 - The `FixedPedigree` simulation model now supports internal samples ({issue}`1855`,
   {pr}`2321`, {pr}`2326`, {pr}`2388`, {user}`abureau`, {user}`jeromekelleher`).
-
 - Add support and wheels for Python3.13
-
+- Support for simulations after local mrca,
+  ({issue}`2157`, {pr}`2396`, {user}`jeromekelleher`, {user}`hossam26644`).
 - Drop Python 3.9 support, require Python >= 3.10 ({pr}`2418`, {user}`benjeffery`)
-
 - Add wheels on Windows ({pr}`2414`, {issue}`2200`,{user}`benjeffery`)
 
 **Breaking changes**:

diff --git a/algorithms.py b/algorithms.py
@@ -19,6 +19,7 @@
 
 
 logger = daiquiri.getLogger()
+INFINITY = sys.float_info.max
 
 
 class FenwickTree:
@@ -247,7 +248,7 @@ def _get_common_ancestor_waiting_time(self, np, t):
         Returns the random waiting time until a common ancestor event
         occurs within this population.
         """
-        ret = sys.float_info.max
+        ret = INFINITY
         u = random.expovariate(2 * np)
         if self.growth_rate == 0:
             ret = self.start_size * u
@@ -267,7 +268,7 @@ def _get_common_ancestor_waiting_time(self, np, t):
     def get_common_ancestor_waiting_time_hudson(self):
         def _get_common_ancestor_waiting_time_hudson(t):
             k = self.get_num_ancestors()
-            ret = sys.float_info.max
+            ret = INFINITY
             if k > 1:
                 np = k * (k - 1) / 2
                 ret = self._get_common_ancestor_waiting_time(np, t)
@@ -278,7 +279,7 @@ def _get_common_ancestor_waiting_time_hudson(t):
     def get_common_ancestor_waiting_time_smc_k(self):
         def _get_common_ancestor_waiting_time_smc_k(t):
             np = self.get_num_pairs()
-            ret = sys.float_info.max
+            ret = INFINITY
             if np > 0:
                 ret = self._get_common_ancestor_waiting_time(np, t)
             return ret
@@ -885,6 +886,7 @@ def __init__(
         gene_conversion_length=1,
         discrete_genome=True,
         hull_offset=None,
+        stop_at_local_mrca=True,
     ):
         # Must be a square matrix.
         N = len(migration_matrix)
@@ -906,6 +908,7 @@ def __init__(
         self.migration_matrix = migration_matrix
         self.num_labels = num_labels
         self.num_populations = N
+        self.stop_at_local_mrca = stop_at_local_mrca
         self.max_segments = max_segments
         self.coalescing_segments_only = coalescing_segments_only
         self.additional_nodes = msprime.NodeType(additional_nodes)
@@ -964,7 +967,7 @@ def __init__(
         self.sweep_trajectory = sweep_trajectory
         self.time_slice = time_slice
 
-        self.modifier_events = [(sys.float_info.max, None, None)]
+        self.modifier_events = [(INFINITY, None, None)]
         for time, pop_id, new_size in population_size_changes:
             self.modifier_events.append(
                 (time, self.change_population_size, (int(pop_id), new_size))
@@ -1046,13 +1049,6 @@ def initialise(self, ts):
                     seg = seg.next
                 self.add_lineage(lineage)
 
-    def ancestors_remain(self):
-        """
-        Returns True if the simulation is not finished, i.e., there is some ancestral
-        material that has not fully coalesced.
-        """
-        return sum(pop.get_num_ancestors() for pop in self.P) != 0
-
     def change_population_size(self, pop_id, size):
         self.P[pop_id].set_start_size(size)
 
@@ -1253,7 +1249,7 @@ def simulate(self, end_time):
         if self.model == "hudson":
             self.hudson_simulate(end_time)
         elif self.model == "dtwf":
-            self.dtwf_simulate()
+            self.dtwf_simulate(end_time)
         elif self.model == "fixed_pedigree":
             self.pedigree_simulate()
         elif self.model == "single_sweep":
@@ -1311,46 +1307,51 @@ def find_cleft_individual(self, label, cleft_value):
             individual_index -= num_ancestors
         raise AssertionError()
 
+    def is_completed(self):
+        for x in self.S.values():
+            if x > 1:
+                return False
+        return True
+
     def hudson_simulate(self, end_time):
         """
         Simulates the algorithm until all loci have coalesced.
         """
-        infinity = sys.float_info.max
         non_empty_pops = {pop.id for pop in self.P if pop.get_num_ancestors() > 0}
         potential_destinations = self.get_potential_destinations()
 
         # only worried about label 0 below
-        while len(non_empty_pops) > 0:
+        while not self.is_completed():
             self.verify()
             if self.t >= end_time:
                 break
             # self.print_state()
             re_rate = self.get_total_recombination_rate(label=0)
-            t_re = infinity
+            t_re = INFINITY
             if re_rate > 0:
                 t_re = random.expovariate(re_rate)
 
             # Gene conversion can occur within segments ..
             gc_rate = self.get_total_gc_rate(label=0)
-            t_gcin = infinity
+            t_gcin = INFINITY
             if gc_rate > 0:
                 t_gcin = random.expovariate(gc_rate)
             # ... or to the left of the first segment.
             gc_left_rate = self.get_total_gc_left_rate(label=0)
-            t_gc_left = infinity
+            t_gc_left = INFINITY
             if gc_left_rate > 0:
                 t_gc_left = random.expovariate(gc_left_rate)
 
             # Common ancestor events occur within demes.
-            t_ca = infinity
+            t_ca = INFINITY
             for index in non_empty_pops:
                 pop = self.P[index]
                 assert pop.get_num_ancestors() > 0
                 t = pop.get_common_ancestor_waiting_time(self.t)
                 if t < t_ca:
                     t_ca = t
                     ca_population = index
-            t_mig = infinity
+            t_mig = INFINITY
             # Migration events happen at the rates in the matrix.
             for j in non_empty_pops:
                 source_size = self.P[j].get_num_ancestors()
@@ -1365,7 +1366,7 @@ def hudson_simulate(self, end_time):
                         mig_source = j
                         mig_dest = k
             min_time = min(t_re, t_ca, t_gcin, t_gc_left, t_mig)
-            assert min_time != infinity
+            assert min_time != INFINITY
             if self.t + min_time > self.modifier_events[0][0]:
                 t, func, args = self.modifier_events.pop(0)
                 self.t = t
@@ -1400,6 +1401,7 @@ def hudson_simulate(self, end_time):
                         non_empty_pops.remove(mig_source)
                     assert self.P[mig_dest].get_num_ancestors() > 0
                     non_empty_pops.add(mig_dest)
+
             logger.info(
                 "%s time=%f n=%d",
                 event,
@@ -1441,7 +1443,7 @@ def single_sweep_simulate(self):
         # main loop time
         t_inc_orig = self.time_slice
         e_time = 0.0
-        while self.ancestors_remain() and sweep_traj_step < len(times) - 1:
+        while sweep_traj_step < len(times) - 1 and not self.is_completed():
             self.verify()
             event_prob = 1.0
             while event_prob > random.random() and sweep_traj_step < len(times) - 1:
@@ -1524,13 +1526,16 @@ def pedigree_simulate(self):
         self.pedigree = Pedigree(self.tables)
         self.dtwf_climb_pedigree()
 
-    def dtwf_simulate(self):
+    def dtwf_simulate(self, end_time):
         """
         Simulates the algorithm until all loci have coalesced.
         """
-        while self.ancestors_remain():
-            self.t += 1
+        while not self.is_completed():
             self.verify()
+            if self.t >= end_time:
+                break
+            self.t += 1
+            # print("DTWF", self.t)
             self.dtwf_generation()
 
     def dtwf_generation(self):
@@ -2256,13 +2261,14 @@ def merge_ancestors(self, H, pop_id, label, new_node_id=-1):
                 if r_max not in self.S:
                     j = self.S.floor_key(r_max)
                     self.S[r_max] = self.S[j]
+                min_overlap = len(X) if self.stop_at_local_mrca else 0
                 # Update the number of extant segments.
-                if self.S[left] == len(X):
+                if self.S[left] == min_overlap:
                     self.S[left] = 0
                     right = self.S.succ_key(left)
                 else:
                     right = left
-                    while right < r_max and self.S[right] != len(X):
+                    while right < r_max and self.S[right] != min_overlap:
                         self.S[right] -= len(X) - 1
                         right = self.S.succ_key(right)
                     alpha = self.alloc_segment(left, right, new_node_id, pop_id)
@@ -2394,6 +2400,7 @@ def merge_two_ancestors(self, population_index, label, x, y, u=-1):
         new_lineage = self.alloc_lineage(None, population_index, label=label)
         coalescence = False
         defrag_required = False
+        min_overlap = 2 if self.stop_at_local_mrca else 0
 
         while x is not None or y is not None:
             alpha = None
@@ -2435,12 +2442,12 @@ def merge_two_ancestors(self, population_index, label, x, y, u=-1):
                         j = self.S.floor_key(r_max)
                         self.S[r_max] = self.S[j]
                     # Update the number of extant segments.
-                    if self.S[left] == 2:
+                    if self.S[left] == min_overlap:
                         self.S[left] = 0
                         right = self.S.succ_key(left)
                     else:
                         right = left
-                        while right < r_max and self.S[right] != 2:
+                        while right < r_max and self.S[right] != min_overlap:
                             self.S[right] -= 1
                             right = self.S.succ_key(right)
                         alpha = self.alloc_segment(
@@ -2732,10 +2739,10 @@ def verify(self):
         Checks that the state of the simulator is consistent.
         """
         self.verify_segments()
-        if self.model not in ["fixed_pedigree", "dtwf"]:
-            # The fixed_pedigree model doesn't maintain a bunch of stuff.
-            # It would probably be simpler if it did.
+        # The fixed_pedigree model doesn't maintain a bunch of stuff.
+        if self.model != "fixed_pedigree":
             self.verify_overlaps()
+        if self.model not in ["fixed_pedigree", "dtwf"]:
             for label in range(self.num_labels):
                 if self.recomb_mass_index is None:
                     assert self.recomb_map.total_mass == 0
@@ -2860,6 +2867,7 @@ def run_simulate(args):
         gene_conversion_length=mean_tract_length,
         discrete_genome=args.discrete,
         hull_offset=args.offset,
+        stop_at_local_mrca=not args.continue_after_local_mrca,
     )
     ts = s.simulate(args.end_time)
     ts.dump(args.output_file)
@@ -2946,6 +2954,15 @@ def add_simulator_arguments(parser):
         help="The delta_t value for selective sweeps",
     )
     parser.add_argument("--model", default="hudson")
+    parser.add_argument(
+        "--continue-after-local-mrca",
+        action="store_true",
+        default=False,
+        help=(
+            "If set, continue after local MRCA (i.e., do not stop). "
+            "Default: False (stop at local MRCA)."
+        ),
+    )
     parser.add_argument("--offset", type=float, default=0.0)
     parser.add_argument(
         "--from-ts",

diff --git a/docs/ancestry.md b/docs/ancestry.md
@@ -1129,6 +1129,7 @@ However, if `coalescing_segments_only=False`, then the edges recorded
 would be from `m` to `p` over the entire segment `[a, b)`, and from `n` to `p` over the entire segment `[c,d)`.
 The nodes `m` and `n` coalesce (in `p`) on only the overlapping segment `[c,b)`, and so the node `p` will be a unary node in the flanking regions: above `m` on the segment `[a,c)` and above `n` on the segment `[b,d)`.
 
+
 (sec_additional_nodes_ca)=
 
 ### Common ancestor events
@@ -1480,6 +1481,38 @@ Migrations nodes are also recorded in the ARG using the
 `NodeType.MIGRANT` flag. See the {ref}`sec_api_node_flags`
 section for more details.
 
+(sec_ancestry_stop_at_local_mrca)=
+
+### Simulating ancestral to local MRCAs
+
+By default, msprime stops simulating local trees when a local most recent
+common ancestor (MRCA) is found. This is because events that occur above the
+common ancestor are shared across all samples, and we are usually interested in
+differences between samples. However, it is sometime useful to simulate ancestry
+across the full span of the region until **all** local MRCAs have been reached.
+To do this, we set the parameter `stop_at_local_mrca` of
+{func}`.sim_ancestry` to `False`.
+
+```{code-cell}
+ts = msprime.sim_ancestry(
+    2, recombination_rate=0.1, sequence_length=10, random_seed=75,
+    stop_at_local_mrca=False, record_full_arg=True)
+ts.draw_svg(time_scale='rank')
+```
+
+In this example we've continued the simulation *after* we've found roots for the local
+trees, and we can see, for example, that node 14 is still the ancestor of 11
+in the rightmost tree. This would be not recorded with the default ``stop_at_local_mrca=False``
+because 11 is the root of that tree.
+
+The stopping condition remains the same---we keep simulating until an MRCA has been
+found at *all* local trees. The interpretation of the the final "root" nodes
+across the trees is a little subtle, and follows the same logic as used when
+{ref}`sec_ancestry_end_time`. All of the trees have a root node with time
+equal to the oldest MRCA, and the nodes correspond to different lineages present
+in the simulation.
+
+
 ## Manipulating simulation time
 
 (sec_ancestry_end_time)=
@@ -1493,7 +1526,7 @@ for two diploid samples:
 
 ```{code-cell}
 ts = msprime.sim_ancestry(2, sequence_length=10, recombination_rate=0.1, random_seed=42)
-SVG(ts.draw_svg(y_axis=True, y_ticks=[0, 1, 2, 3, 4, 5]))
+SVG(ts.draw_svg(y_axis=True, y_ticks=[0, 1, 2, 3, 4]))
 ```
 
 Sometimes we would like to stop the simulation early, **before** complete
@@ -2780,6 +2813,39 @@ Because the input pedigree fully describes the simulation many features such as
 combined with other {ref}`ancestry models<sec_ancestry_models>`.
 :::
 
+(sec_ancestry_models_fixed_pedigree_tracing)=
+
+#### Tracing ancestry through a pedigree
+
+The previous example generated trees that are embedded within the pedigree,
+but doesn't tell us directly the exact path that each sample's genetic
+material took through the ancestors. To trace these paths exactly, we can
+use a combination of the ``additional_nodes``,
+``coalescing_segments_only``
+(see {ref}`sec_ancestry_additional_nodes`),
+and ``stop_at_local_mrca`` (see {ref}`sec_ancestry_stop_at_local_mrca`).
+
+```{code-cell}
+ped_ts = msprime.sim_ancestry(
+    initial_state=pedigree,
+    model="fixed_pedigree",
+    random_seed=41,
+    recombination_rate=0.001,
+    coalescing_segments_only=False,
+    additional_nodes=(
+         msprime.NodeType.COMMON_ANCESTOR |
+         msprime.NodeType.PASS_THROUGH),
+    stop_at_local_mrca=True
+)
+node_labels = {node.id: f"{node.individual}({node.id})" for node in ped_ts.nodes()}
+SVG(ped_ts.draw_svg(y_axis=True,  node_labels=node_labels, size=(600,200)))
+```
+
+We can now see that the ancestor of node 15 is 20, which we would have to
+infer in the previous example.
+
+
+
 #### Censoring pedigree information
 
 :::{warning}
@@ -2883,6 +2949,7 @@ when calculating population sizes. Thus, the pedigree must be seen as
 entirely **external** to the population model simulated during recapitation.
 :::
 
+
 (sec_ancestry_models_fixed_pedigree_demography)=
 
 #### Pedigrees and demography