Skip to content

Commit c85b82a

Browse files
committed
Add distribution tests to look for specific error modes
Directly testing that the probability of an event is as expected can be considerably more powerful and can require less memory than performing a Kolmogorov-Smirnov test; however, this approach is much less flexible.
1 parent a6a9f7b commit c85b82a

File tree

1 file changed

+295
-0
lines changed

1 file changed

+295
-0
lines changed

distr_test/tests/binary.rs

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
// Copyright 2025 Developers of the Rand project.
2+
//
3+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4+
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5+
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
6+
// option. This file may not be copied, modified, or distributed
7+
// except according to those terms.
8+
9+
use rand::SeedableRng;
10+
use rand::rngs::SmallRng;
11+
use rand_distr::{Bernoulli, Binomial, Distribution, Geometric};
12+
13+
/// Say `n` samples are drawn independently from a Bernoulli distribution
14+
/// with probability `q` in `[q_lb, q_ub]` of outputting 1. Let X be their sum.
15+
///
16+
/// If `k > q_ub n`, this function returns an estimate (some inaccuracy possible
17+
/// due to floating point error) of an upper bound on the log-probability that X >= k;
18+
/// if `k < q_lb n`, the function returns an estimate of an upper bound for the
19+
/// log-probability that X <= k. Otherwise, `k` might be equal to `q n` and the
20+
/// function returns log-probability 0.0 (probability 1.0).
21+
///
22+
/// Note: the value returned is the logarithm of the probability bound estimate.
23+
fn bernouilli_ln_tail_prob_bound(q_lb: f64, q_ub: f64, k: u64, n: u64) -> f64 {
24+
fn kl_div_lb(r: f64, p: f64) -> f64 {
25+
// Note: this calculation can be inaccurate when r, p are tiny
26+
if p <= 0.0 {
27+
if r > p { f64::INFINITY } else { 0.0 }
28+
} else if 1.0 - p <= 0.0 {
29+
if r < p { f64::INFINITY } else { 0.0 }
30+
} else if r == 0.0 {
31+
(1.0 - r) * f64::ln((1.0 - r) / (1.0 - p))
32+
} else if 1.0 - r == 0.0 {
33+
r * f64::ln(r / p)
34+
} else {
35+
r * f64::ln(r / p) + (1.0 - r) * f64::ln((1.0 - r) / (1.0 - p))
36+
}
37+
}
38+
39+
assert!(k <= n);
40+
assert!(0.0 <= q_lb && q_ub <= 1.0);
41+
42+
let r = (k as f64) / (n as f64);
43+
if r < q_lb {
44+
-(n as f64) * kl_div_lb(r, q_lb)
45+
} else if r > q_ub {
46+
-(n as f64) * kl_div_lb(1.0 - r, 1.0 - q_ub)
47+
} else {
48+
0.0
49+
}
50+
}
51+
52+
/// For y=e^x, z = min(2 y, 1), return ln(z)
53+
fn min_2x_under_ln(x: f64) -> f64 {
54+
if x.is_nan() {
55+
x
56+
} else {
57+
0.0f64.min(x + f64::ln(2.0))
58+
}
59+
}
60+
61+
// Threshold probability for the output of a test possibly indicating
62+
// a discrepancy between the actual and ideal distribution.
63+
// (The event could happen by chance on a 1e-3 fraction of seeds even
64+
// if the distributions match.)
65+
const POSSIBLE_DISCREPANCY_THRESHOLD: f64 = -3.0 * std::f64::consts::LN_10;
66+
67+
// Threshold probability for the output of a test certainly indicating
68+
// a discrepancy between the actual and ideal distribution
69+
// (Hardware failures are many orders of magnitude more likely
70+
// than the entire system being correct.)
71+
const CERTAIN_DISCREPANCY_THRESHOLD: f64 = -40.0 * std::f64::consts::LN_10;
72+
73+
#[derive(Debug)]
74+
enum TestFailure {
75+
#[allow(unused)]
76+
Possible(f64),
77+
Certain,
78+
}
79+
80+
fn test_binary(
81+
seed: u64,
82+
ideal_prob_lb: f64,
83+
ideal_prob_ub: f64,
84+
sample_size: u64,
85+
sample_fn: &dyn Fn(&mut SmallRng) -> bool,
86+
) -> Result<(), TestFailure> {
87+
let mut rng = rand::rngs::SmallRng::seed_from_u64(seed);
88+
let mut ones: u64 = 0;
89+
for _ in 0..sample_size {
90+
ones += if sample_fn(&mut rng) { 1 } else { 0 };
91+
}
92+
93+
let ln_single_tail_p =
94+
bernouilli_ln_tail_prob_bound(ideal_prob_lb, ideal_prob_ub, ones, sample_size);
95+
// Double the probability to correct for the fact that there are two tails
96+
let ln_p = min_2x_under_ln(ln_single_tail_p);
97+
98+
if ln_p < CERTAIN_DISCREPANCY_THRESHOLD {
99+
Err(TestFailure::Certain)
100+
} else if ln_p < POSSIBLE_DISCREPANCY_THRESHOLD {
101+
Err(TestFailure::Possible(f64::exp(ln_p)))
102+
} else {
103+
Ok(())
104+
}
105+
}
106+
107+
/// Verify that the re-exported Bernoulli sampler is
108+
/// not clearly far from the correct distribution
109+
#[test]
110+
fn test_bernouilli() {
111+
let sample_size = 1000000;
112+
let seed = 0x1;
113+
114+
// Check that the Bernouilli sampler is not far from correct
115+
for p_base in [0.0, 1e-9, 1e-3, 1.0 / 3.0, 0.5] {
116+
for p in [p_base, 1.0 - p_base] {
117+
test_binary(seed, p, p, sample_size, &|rng| {
118+
let b = Bernoulli::new(p).unwrap();
119+
b.sample(rng)
120+
})
121+
.unwrap();
122+
}
123+
}
124+
125+
// Check that the test will actually catch clear discrepancies.
126+
assert!(matches!(
127+
test_binary(seed, 0.4, 0.4, sample_size, &|rng| {
128+
let b = Bernoulli::new(0.6).unwrap();
129+
b.sample(rng)
130+
}),
131+
Err(TestFailure::Certain)
132+
));
133+
}
134+
135+
/// For X ~ Binomial(n; p), returns Pr[X mod 2 = 1]
136+
fn binomial_last_bit_probability(n: u64, p: f64) -> f64 {
137+
/* Since
138+
*
139+
* 1 = (p + (1-p))^n = ∑_k \binom{n}{k} p^k (1-p)^{n-k} ,
140+
*
141+
* and
142+
*
143+
* (-p + (1-p))^n = ∑_k (-1)^k \binom{n}{k} p^k (1-p)^{n-k} ,
144+
*
145+
* adding them together gives:
146+
*
147+
* 1 + (1 - 2p)^n = ∑_k (1 + (-1)^k) \binom{n}{k} p^k (1-p)^{n-k}
148+
* = ∑_k 2 ⋅ 1_{k mod 2 = 0} \binom{n}{k} p^k (1-p)^{n-k}
149+
* = 2 Pr[k mod 2 = 0] .
150+
*
151+
* So:
152+
*
153+
* Pr[k mod 2 = 1] = 1 - ½ (1 + (1 - 2p)^n) = ½ (1 - (1 - 2p)^n)
154+
*/
155+
156+
0.5 * (1.0 - (1.0 - 2.0 * p).powi(n.try_into().unwrap_or(i32::MAX)))
157+
}
158+
159+
/// Do samples from a binomial distribution, taken mod 2, match the expected distribution?
160+
/// (This is a likely failure mode of samplers using floating point.)
161+
#[test]
162+
fn test_binomial_last_bit() {
163+
let sample_size = 100000;
164+
let seed = 0x1;
165+
166+
let mut sizes = Vec::<u64>::new();
167+
// Binomial::new(n, p) currently panics for when a quantity derived from n is >= i64::MAX
168+
for i in 1..=62 {
169+
sizes.push((1 << i) - 1);
170+
sizes.push(1 << i);
171+
sizes.push((1 << i) + 1);
172+
sizes.push(3 * (1 << (i - 1)));
173+
}
174+
175+
for s in sizes {
176+
for p in [0.1, 0.5, 0.9] {
177+
let t = binomial_last_bit_probability(s, p);
178+
179+
let Ok(dist) = Binomial::new(s, p) else {
180+
println!("Failed to create Binomial with n={}, p={}", s, p);
181+
continue;
182+
};
183+
184+
let res = test_binary(seed, t, t, sample_size, &|rng| dist.sample(rng) % 2 == 1);
185+
186+
// Binomial::new()'s documentation only promises accuracy up to n=~2^53
187+
// Using `p` closer to 0 or 1 produces a narrower peak which is easier to sample correctly
188+
if s <= 1 << 53 {
189+
assert!(res.is_ok());
190+
} else {
191+
println!(
192+
"Binomial distribution with n={}, p={:.4}, last bit prob {:.4}, log2(n)={:.3}: last bit result {:?}",
193+
s,
194+
p,
195+
t,
196+
(s as f64).log2(),
197+
res
198+
);
199+
}
200+
}
201+
}
202+
}
203+
204+
/// For X ~ Geometric(p), returns Pr[X mod 2 = 1]
205+
fn geometric_last_bit_probability(p: f64) -> f64 {
206+
/* The geometric probabilities are
207+
* 0 1 2 3
208+
* p, (1-p)p, (1-p)^2 p, (1-p)^3 p, ...
209+
*
210+
* As Pr[X mod 2 = 1] = (1 - p) Pr[X mod 2 = 0],
211+
* and Pr[X mod 2 = 1] = 1 - Pr[X mod 2 = 0],
212+
* it follows:
213+
*
214+
* Pr[X mod 2 = 1] = 1 - 1/(2 - p)
215+
*/
216+
(1.0 - p) / (2.0 - p)
217+
}
218+
219+
#[test]
220+
fn test_geometric_last_bit() {
221+
let sample_size = 100000;
222+
let seed = 0x1;
223+
224+
// Test on distributions with values of p of various closeness to 0.0
225+
for i in 0..=128 {
226+
let p = 0.5_f64.powf(i as f64 / 2.0);
227+
228+
if 1.0 - p == 1.0 {
229+
// The current implementation of Geometric always returns u64::MAX when 1.0 - p = 1.0
230+
continue;
231+
}
232+
233+
let t = geometric_last_bit_probability(p);
234+
let clipped_prob = (special::Primitive::ln_1p(-p) * 2.0f64.powi(64)).exp();
235+
236+
let Ok(dist) = Geometric::new(p) else {
237+
println!("Failed to create Geometric with p={}", p);
238+
continue;
239+
};
240+
241+
let res = test_binary(
242+
seed,
243+
t - clipped_prob,
244+
t + clipped_prob,
245+
sample_size,
246+
&|rng| dist.sample(rng) % 2 == 1,
247+
);
248+
249+
println!(
250+
"Geometric distribution with p={:e}, last bit prob {}-{}: last bit result {:?}",
251+
p,
252+
t - clipped_prob,
253+
t + clipped_prob,
254+
res
255+
);
256+
257+
assert!(res.is_ok(), "{:?}", res);
258+
}
259+
}
260+
261+
/// How accurate are the probabilities for outputting 0 and n for Binomial(n, p)?
262+
///
263+
/// Uncareful boundary handling can make the endpoint probabilities wrong by a
264+
/// constant factor, although the tails have low enough probability that this
265+
/// can be hard to detect.
266+
#[test]
267+
#[ignore]
268+
fn test_binomial_endpoints() {
269+
let p = 0.5;
270+
// Note: conclusively indicating an error tends to require more than 1 / event probability
271+
// samples, so this test needs ~100M samples to find any issues in the below at n >= 20
272+
let sample_size = 400_000_000;
273+
let seed = 0x1;
274+
275+
// With the current implementation, n <= 20 always uses BINV or Poisson sampling, so to check
276+
// the BTPE implementation larger `s` is needed.
277+
278+
for s in 1..25u64 {
279+
let Ok(dist) = Binomial::new(s, p) else {
280+
println!("Failed to create Binomial with n={}, p={}", s, p);
281+
continue;
282+
};
283+
284+
let t = p.powi(s as i32) + (1.0 - p).powi(s as i32);
285+
let res = test_binary(seed, t, t, sample_size, &|rng| {
286+
let v = dist.sample(rng);
287+
v == 0 || v == s
288+
});
289+
println!(
290+
"Binomial({}, {}) endpoint test, evt prob {:e}, {:?}",
291+
s, p, t, res
292+
);
293+
assert!(res.is_ok());
294+
}
295+
}

0 commit comments

Comments
 (0)