diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py index e60f5d370..d9d3ab46f 100644 --- a/benchmarks/benchmarks.py +++ b/benchmarks/benchmarks.py @@ -24,6 +24,7 @@ documentation for details on how to run these and how to develop your own benchmarks. """ + try: import stdpopsim @@ -55,148 +56,140 @@ def setup(self): self.recomb_map_chr22 = genetic_map.get_chromosome_map("chr22") -class Hudson(LargeSimulationBenchmark): - def _run_large_sample_size(self): - msprime.simulate( - sample_size=10**6, - length=1e7, - Ne=10**4, - recombination_rate=1e-8, - random_seed=42, - ) - - def time_large_sample_size(self): - self._run_large_sample_size() - - def peakmem_large_sample_size(self): - self._run_large_sample_size() - - def _run_long_sequence_length(self): - msprime.simulate( - sample_size=100, - length=1e8, - Ne=10**4, - recombination_rate=1e-8, - random_seed=42, - ) - - def time_long_sequence_length(self): - self._run_long_sequence_length() - - def peakmem_long_sequence_length(self): - self._run_long_sequence_length() - - def _run_long_sequence_length_gene_conversion(self): - msprime.sim_ancestry( - sample_size=100, - length=1e8, - Ne=10**4, - gene_conversion_rate=1e-8, - # 100Kb tract length. - gene_conversion_tract_length=100 * 1e3, - random_seed=43, - ) - - def time_long_sequence_length_gene_conversion(self): - self._run_long_sequence_length() - - def peakmem_long_sequence_length_gene_conversion(self): - self._run_long_sequence_length() - - def _run_human_chr22(self): - msprime.simulate( - sample_size=100, - Ne=10**4, - recombination_map=self.recomb_map_chr22, - random_seed=234, - ) - - def time_human_chr22(self): - self._run_human_chr22() - - def peakmem_human_chr22(self): - self._run_human_chr22() - - def _run_many_replicates(self): - for _ in msprime.simulate(10, num_replicates=10**5, random_seed=1234): - pass +class HudsonlargeSampleSize(LargeSimulationBenchmark): + + def _get_params(self): + return { + "samples": 0.5 * (10**6), + "sequence_length": 1e7, + "population_size": 10**4, + "recombination_rate": 1e-8, + "random_seed": 42, + } + + def run(self): + return msprime.sim_ancestry(**self._get_params()) + + def time_test(self): + self.run() + + def peakmem_test(self): + self.run() + + +class HudsonlargeSampleSizeOverRoot(HudsonlargeSampleSize): + def _get_params(self): + return { + **super()._get_params(), + "stop_at_local_mrca": False, + } + - def time_many_replicates(self): - self._run_many_replicates() - - def peakmem_many_replicates(self): - self._run_many_replicates() - - # 2 populations, high migration. - # Lots of populations, 1D stepping stone. - - -class DTWF(LargeSimulationBenchmark): - def _run_large_population_size(self): - msprime.simulate( - sample_size=1000, - Ne=10**6, - length=1e5, - recombination_rate=1e-8, - random_seed=42, - model="dtwf", - end_time=1000, - ) - - def time_large_population_size(self): - self._run_large_population_size() - - def peakmem_large_population_size(self): - self._run_large_population_size() - - def _run_long_sequence_length(self): - msprime.simulate( - sample_size=100, - Ne=10**4, - length=1e7, - recombination_rate=1e-8, - random_seed=42, - model="dtwf", - # Tuning this to give ~30s runtime. - end_time=5e4, - ) - - def time_long_sequence_length(self): - self._run_long_sequence_length() - - def peakmem_long_sequence_length(self): - self._run_long_sequence_length() - - def _run_human_chr22(self): - msprime.simulate( - sample_size=100, - Ne=10**4, - recombination_map=self.recomb_map_chr22, - random_seed=234, - end_time=10000, - model="dtwf", - ) - - def time_human_chr22(self): - self._run_human_chr22() - - def peakmem_human_chr22(self): - self._run_human_chr22() - - def _run_many_replicates(self): - reps = msprime.simulate( - 10, - Ne=100, - num_replicates=10**5, - random_seed=1234, - model="dtwf", - end_time=100, - ) - for _ in reps: +class HudsonLongSequenceLength(HudsonlargeSampleSize): + def _get_params(self): + return { + **super()._get_params(), + "sequence_length": 1e8, + "samples": 50, + } + + +class HudsonLongSequenceLengthGeneConversion(HudsonlargeSampleSize): + def _get_params(self): + return { + "sequence_length": 1e8, + "samples": 50, + "gene_conversion_rate": 1e-8, + "gene_conversion_tract_length": 100 * 1e3, + "random_seed": 43, + } + + +class HudsonHumanChr22(HudsonlargeSampleSize): + + def _get_params(self): + return { + **super()._get_params(), + "sequence_length": None, + "samples": 50, + "recombination_rate": self.recomb_map_chr22, + } + + +class HudsonManyReplicates(HudsonlargeSampleSize): + + def run(self): + params = {"samples": 10, "num_replicates": 10**5, "random_seed": 1234} + for _ in msprime.sim_ancestry(**params): pass - def time_many_replicates(self): - self._run_many_replicates() - def peakmem_many_replicates(self): - self._run_many_replicates() +class HudsonHumanChr22OverRoot(HudsonlargeSampleSize): + def _get_params(self): + return { + **super()._get_params(), + "sequence_length": None, + "samples": 50, + "recombination_rate": self.recomb_map_chr22, + "stop_at_local_mrca": False, + } + + +class DTWFLargePopulationSize(LargeSimulationBenchmark): + def _get_params(self): + return { + "samples": 500, + "sequence_length": 1e5, + "population_size": 10**6, + "recombination_rate": 1e-8, + "random_seed": 42, + "model": "dtwf", + "end_time": 1000, + } + + def run(self): + return msprime.sim_ancestry(**self._get_params()) + + def time_test(self): + self.run() + + def peakmem_test(self): + self.run() + + +class DTWFLongSequenceLength(DTWFLargePopulationSize): + def _get_params(self): + return { + **super()._get_params(), + "sequence_length": 1e7, + "samples": 50, + "end_time": 5e4, + "population_size": 10**4, + } + + +class DTWFHumanChr22(DTWFLargePopulationSize): + + def _get_params(self): + return { + **super()._get_params(), + "sequence_length": None, + "samples": 50, + "recombination_rate": self.recomb_map_chr22, + "end_time": 10000, + "population_size": 10**4, + } + + +class DTWFManyReplicates(DTWFLargePopulationSize): + def run(self): + params = { + "samples": 5, + "population_size": 100, + "num_replicates": 10**5, + "random_seed": 1234, + "model": "dtwf", + "end_time": 100, + } + for _ in msprime.sim_ancestry(**params): + pass diff --git a/benchmarks/check_asv.sh b/benchmarks/check_asv.sh index 89a738be1..95176d889 100755 --- a/benchmarks/check_asv.sh +++ b/benchmarks/check_asv.sh @@ -4,6 +4,10 @@ sudo cpufreq-set -c 3 -g performance #asv does recent commits first, so by letting it run for 55min, and the cron to 1hr #it will always keep up with new commits, but also process the backlog timeout 3300s asv run -j 4 --show-stderr --cpu-affinity 3 --skip-existing ALL + +#instead you can test for a set of versions: uncomment the next line and comment line 6 to do so +#you can add more version to benshmark to the file version_hashes.txt +#asv run -j 4 --show-stderr --cpu-affinity 3 --skip-existing HASHFILE:benchmarks/version_hashes.txt sudo cpufreq-set -c 3 -g powersave asv publish asv preview diff --git a/benchmarks/version_hashes.txt b/benchmarks/version_hashes.txt index b6faff384..2f0ca5fd7 100644 --- a/benchmarks/version_hashes.txt +++ b/benchmarks/version_hashes.txt @@ -1,6 +1,10 @@ -661200e8fc310d374eb266cad39d4d3d96f0b8e9 -8ce627f754ee52d7961fd9016fcbed202234734e -8a39c401d1c817f4950b7fc486da8238b9fa76ec -3e28f9dea511e6745f99cbf2c009cdcea4a73f20 -b8f5fb60b0ca0885b7f1c2d7ac7a9ae3b95fc543 -18d3c417db2cdd87ad6539a5e2cef60250460b3a +d2e99ab2d4ba828bc1f19b0eb23f0334c1b26f1a +cda89ed3f670c428d0452607753dccd8fc79f5ff +2ddc65cc1e83c6d3c2af78e781bd5983a0364ff7 +0da0150a2d65053933c2c5f9b598e9197c7ea9ac +ad3f625715821a3133d841a70d2a782d7e8fd171 +ff3f7851b647f28640cc2cbca62cfd056508e2d6 +804e0361c4f8b5f5051a9fbf411054ee8be3426a +c11a2f12c72dd054a0ade0767474184ceec8281c +f6fc608975bce01070f23e785d89fe171d190a9a +57ef4ee3267cd9b8e711787539007b0cde94c55c \ No newline at end of file