Skip to content

Commit 447b350

Browse files
committed
Add distribution tests to look for specific error modes
Directly testing that the probability of an event is as expected can be considerably more powerful and can require less memory than performing a Kolmogorov-Smirnov test; however, this approach is much less flexible.
1 parent a6a9f7b commit 447b350

File tree

1 file changed

+297
-0
lines changed

1 file changed

+297
-0
lines changed

distr_test/tests/binary.rs

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
// Copyright 2025 Developers of the Rand project.
2+
//
3+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4+
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5+
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6+
// option. This file may not be copied, modified, or distributed
7+
// except according to those terms.
8+
9+
use rand::SeedableRng;
10+
use rand::rngs::SmallRng;
11+
use rand_distr::{Bernoulli, Binomial, Distribution, Geometric};
12+
13+
/// Say `n` samples are drawn independently from a Bernoulli distribution
14+
/// with probability `q` in `[q_lb, q_ub]` of outputting 1. Let X be their sum.
15+
///
16+
/// If `k > q_ub n`, this function returns an estimate (some inaccuracy possible
17+
/// due to floating point error) of an upper bound on the log-probability that X >= k;
18+
/// if `k < q_lb n`, the function returns an estimate of an upper bound for the
19+
/// log-probability that X <= k. Otherwise, `k` might be equal to `q n` and the
20+
/// function returns log-probability 0.0 (probability 1.0).
21+
///
22+
/// Note: the value returned is the logarithm of the probability bound estimate.
23+
fn bernouilli_ln_tail_prob_bound(q_lb: f64, q_ub: f64, k: u64, n: u64) -> f64 {
24+
fn kl_div_lb(r: f64, p: f64) -> f64 {
25+
// Note: this calculation can be inaccurate when r, p are tiny
26+
if p <= 0.0 {
27+
if r > p { f64::INFINITY } else { 0.0 }
28+
} else if 1.0 - p <= 0.0 {
29+
if r < p { f64::INFINITY } else { 0.0 }
30+
} else {
31+
if r == 0.0 {
32+
(1.0 - r) * f64::ln((1.0 - r) / (1.0 - p))
33+
} else if 1.0 - r == 0.0 {
34+
r * f64::ln(r / p)
35+
} else {
36+
r * f64::ln(r / p) + (1.0 - r) * f64::ln((1.0 - r) / (1.0 - p))
37+
}
38+
}
39+
}
40+
41+
assert!(k <= n);
42+
assert!(0.0 <= q_lb && q_ub <= 1.0);
43+
44+
let r = (k as f64) / (n as f64);
45+
if r < q_lb {
46+
-(n as f64) * kl_div_lb(r, q_lb)
47+
} else if r > q_ub {
48+
-(n as f64) * kl_div_lb(1.0 - r, 1.0 - q_ub)
49+
} else {
50+
0.0
51+
}
52+
}
53+
54+
/// For y=e^x, z = min(2 y, 1), return ln(z)
55+
fn min_2x_under_ln(x: f64) -> f64 {
56+
if x.is_nan() {
57+
x
58+
} else {
59+
0.0f64.min(x + f64::ln(2.0))
60+
}
61+
}
62+
63+
// Threshold probability for the output of a test possibly indicating
64+
// a discrepancy between the actual and ideal distribution.
65+
// (The event could happen by chance on a 1e-3 fraction of seeds even
66+
// if the distributions match.)
67+
const POSSIBLE_DISCREPANCY_THRESHOLD: f64 = -3.0 * std::f64::consts::LN_10;
68+
69+
// Threshold probability for the output of a test certainly indicating
70+
// a discrepancy between the actual and ideal distribution
71+
// (Hardware failures are many orders of magnitude more likely
72+
// than the entire system being correct.)
73+
const CERTAIN_DISCREPANCY_THRESHOLD: f64 = -40.0 * std::f64::consts::LN_10;
74+
75+
#[derive(Debug)]
76+
enum TestFailure {
77+
#[allow(unused)]
78+
Possible(f64),
79+
Certain,
80+
}
81+
82+
fn test_binary(
83+
seed: u64,
84+
ideal_prob_lb: f64,
85+
ideal_prob_ub: f64,
86+
sample_size: u64,
87+
sample_fn: &dyn Fn(&mut SmallRng) -> bool,
88+
) -> Result<(), TestFailure> {
89+
let mut rng = rand::rngs::SmallRng::seed_from_u64(seed);
90+
let mut ones: u64 = 0;
91+
for _ in 0..sample_size {
92+
ones += if sample_fn(&mut rng) { 1 } else { 0 };
93+
}
94+
95+
let ln_single_tail_p =
96+
bernouilli_ln_tail_prob_bound(ideal_prob_lb, ideal_prob_ub, ones, sample_size);
97+
// Double the probability to correct for the fact that there are two tails
98+
let ln_p = min_2x_under_ln(ln_single_tail_p);
99+
100+
if ln_p < CERTAIN_DISCREPANCY_THRESHOLD {
101+
Err(TestFailure::Certain)
102+
} else if ln_p < POSSIBLE_DISCREPANCY_THRESHOLD {
103+
Err(TestFailure::Possible(f64::exp(ln_p)))
104+
} else {
105+
Ok(())
106+
}
107+
}
108+
109+
/// Verify that the re-exported Bernoulli sampler is
110+
/// not clearly far from the correct distribution
111+
#[test]
112+
fn test_bernouilli() {
113+
let sample_size = 1000000;
114+
let seed = 0x1;
115+
116+
// Check that the Bernouilli sampler is not far from correct
117+
for p_base in [0.0, 1e-9, 1e-3, 1.0 / 3.0, 0.5] {
118+
for p in [p_base, 1.0 - p_base] {
119+
test_binary(seed, p, p, sample_size, &|rng| {
120+
let b = Bernoulli::new(p).unwrap();
121+
b.sample(rng)
122+
})
123+
.unwrap();
124+
}
125+
}
126+
127+
// Check that the test will actually catch clear discrepancies.
128+
assert!(matches!(
129+
test_binary(seed, 0.4, 0.4, sample_size, &|rng| {
130+
let b = Bernoulli::new(0.6).unwrap();
131+
b.sample(rng)
132+
}),
133+
Err(TestFailure::Certain)
134+
));
135+
}
136+
137+
/// For X ~ Binomial(n; p), returns Pr[X mod 2 = 1]
138+
fn binomial_last_bit_probability(n: u64, p: f64) -> f64 {
139+
/* Since
140+
*
141+
* 1 = (p + (1-p))^n = ∑_k \binom{n}{k} p^k (1-p)^{n-k} ,
142+
*
143+
* and
144+
*
145+
* (-p + (1-p))^n = ∑_k (-1)^k \binom{n}{k} p^k (1-p)^{n-k} ,
146+
*
147+
* adding them together gives:
148+
*
149+
* 1 + (1 - 2p)^n = ∑_k (1 + (-1)^k) \binom{n}{k} p^k (1-p)^{n-k}
150+
* = ∑_k 2 ⋅ 1_{k mod 2 = 0} \binom{n}{k} p^k (1-p)^{n-k}
151+
* = 2 Pr[k mod 2 = 0] .
152+
*
153+
* So:
154+
*
155+
* Pr[k mod 2 = 1] = 1 - ½ (1 + (1 - 2p)^n) = ½ (1 - (1 - 2p)^n)
156+
*/
157+
158+
0.5 * (1.0 - (1.0 - 2.0 * p).powi(n.try_into().unwrap_or(i32::MAX)))
159+
}
160+
161+
/// Do samples from a binomial distribution, taken mod 2, match the expected distribution?
162+
/// (This is a likely failure mode of samplers using floating point.)
163+
#[test]
164+
fn test_binomial_last_bit() {
165+
let sample_size = 100000;
166+
let seed = 0x1;
167+
168+
let mut sizes = Vec::<u64>::new();
169+
// Binomial::new(n, p) currently panics for when a quantity derived from n is >= i64::MAX
170+
for i in 1..=62 {
171+
sizes.push((1 << i) - 1);
172+
sizes.push(1 << i);
173+
sizes.push((1 << i) + 1);
174+
sizes.push(3 * (1 << (i - 1)));
175+
}
176+
177+
for s in sizes {
178+
for p in [0.1, 0.5, 0.9] {
179+
let t = binomial_last_bit_probability(s, p);
180+
181+
let Ok(dist) = Binomial::new(s, p) else {
182+
println!("Failed to create Binomial with n={}, p={}", s, p);
183+
continue;
184+
};
185+
186+
let res = test_binary(seed, t, t, sample_size, &|rng| dist.sample(rng) % 2 == 1);
187+
188+
// Binomial::new()'s documentation only promises accuracy up to n=~2^53
189+
// Using `p` closer to 0 or 1 produces a narrower peak which is easier to sample correctly
190+
if s <= 1 << 53 {
191+
assert!(res.is_ok());
192+
} else {
193+
println!(
194+
"Binomial distribution with n={}, p={:.4}, last bit prob {:.4}, log2(n)={:.3}: last bit result {:?}",
195+
s,
196+
p,
197+
t,
198+
(s as f64).log2(),
199+
res
200+
);
201+
}
202+
}
203+
}
204+
}
205+
206+
/// For X ~ Geometric(p), returns Pr[X mod 2 = 1]
207+
fn geometric_last_bit_probability(p: f64) -> f64 {
208+
/* The geometric probabilities are
209+
* 0 1 2 3
210+
* p, (1-p)p, (1-p)^2 p, (1-p)^3 p, ...
211+
*
212+
* As Pr[X mod 2 = 1] = (1 - p) Pr[X mod 2 = 0],
213+
* and Pr[X mod 2 = 1] = 1 - Pr[X mod 2 = 0],
214+
* it follows:
215+
*
216+
* Pr[X mod 2 = 1] = 1 - 1/(2 - p)
217+
*/
218+
(1.0 - p) / (2.0 - p)
219+
}
220+
221+
#[test]
222+
fn test_geometric_last_bit() {
223+
let sample_size = 100000;
224+
let seed = 0x1;
225+
226+
// Test on distributions with values of p of various closeness to 0.0
227+
for i in 0..=128 {
228+
let p = 0.5_f64.powf(i as f64 / 2.0);
229+
230+
if 1.0 - p == 1.0 {
231+
// The current implementation of Geometric always returns u64::MAX when 1.0 - p = 1.0
232+
continue;
233+
}
234+
235+
let t = geometric_last_bit_probability(p);
236+
let clipped_prob = (special::Primitive::ln_1p(-p) * 2.0f64.powi(64)).exp();
237+
238+
let Ok(dist) = Geometric::new(p) else {
239+
println!("Failed to create Geometric with p={}", p);
240+
continue;
241+
};
242+
243+
let res = test_binary(
244+
seed,
245+
t - clipped_prob,
246+
t + clipped_prob,
247+
sample_size,
248+
&|rng| dist.sample(rng) % 2 == 1,
249+
);
250+
251+
println!(
252+
"Geometric distribution with p={:e}, last bit prob {}-{}: last bit result {:?}",
253+
p,
254+
t - clipped_prob,
255+
t + clipped_prob,
256+
res
257+
);
258+
259+
assert!(res.is_ok(), "{:?}", res);
260+
}
261+
}
262+
263+
/// How accurate are the probabilities for outputting 0 and n for Binomial(n, p)?
264+
///
265+
/// Uncareful boundary handling can make the endpoint probabilities wrong by a
266+
/// constant factor, although the tails have low enough probability that this
267+
/// can be hard to detect.
268+
#[test]
269+
#[ignore]
270+
fn test_binomial_endpoints() {
271+
let p = 0.5;
272+
// Note: conclusively indicating an error tends to require more than 1 / event probability
273+
// samples, so this test needs ~100M samples to find any issues in the below at n >= 20
274+
let sample_size = 400_000_000;
275+
let seed = 0x1;
276+
277+
// With the current implementation, n <= 20 always uses BINV or Poisson sampling, so to check
278+
// the BTPE implementation larger `s` is needed.
279+
280+
for s in 1..25u64 {
281+
let Ok(dist) = Binomial::new(s, p) else {
282+
println!("Failed to create Binomial with n={}, p={}", s, p);
283+
continue;
284+
};
285+
286+
let t = p.powi(s as i32) + (1.0 - p).powi(s as i32);
287+
let res = test_binary(seed, t, t, sample_size, &|rng| {
288+
let v = dist.sample(rng);
289+
v == 0 || v == s
290+
});
291+
println!(
292+
"Binomial({}, {}) endpoint test, evt prob {:e}, {:?}",
293+
s, p, t, res
294+
);
295+
assert!(res.is_ok());
296+
}
297+
}

0 commit comments

Comments
 (0)