Skip to content

Commit f3e444a

Browse files
committed
feat: panic and sentry hook for foundations
1 parent 15298d0 commit f3e444a

File tree

13 files changed

+848
-2
lines changed

13 files changed

+848
-2
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,3 +320,6 @@ jobs:
320320
- name: Run foundations tests
321321
run: _RJEM_MALLOC_CONF=prof:true cargo nextest run -p foundations --target ${{ matrix.target }}
322322
shell: bash
323+
- name: Run panic_hook tests with no default features
324+
run: _RJEM_MALLOC_CONF=prof:true cargo nextest run -p foundations --test panic_hook --no-default-features --target ${{ matrix.target }}
325+
shell: bash

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ tower-service = "0.3"
9191
tracing-slog = "0.3.0"
9292
tracing-subscriber = "0.3"
9393
yaml-merge-keys = { version = "0.5", features = ["serde_yaml"] }
94+
sentry-core = "0.36"
95+
sentry = "0.36"
9496

9597
# needed for minver
9698
async-stream = "0.3"

foundations/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,12 @@ platform-common-default = [
4141
"testing",
4242
"settings_deny_unknown_fields_by_default",
4343
"panic_on_too_much_logger_nesting",
44+
"sentry",
4445
]
4546

47+
# Sentry integration for fatal error tracking
48+
sentry = ["dep:sentry-core"]
49+
4650
# A subset of features that can be used both on server and client sides. Useful for libraries
4751
# that can be used either way.
4852
server-client-common-default = ["settings", "client-telemetry", "testing"]
@@ -231,6 +235,7 @@ tikv-jemallocator = { workspace = true, optional = true, features = [
231235
yaml-merge-keys = { workspace = true, optional = true, features = [
232236
"serde_yaml",
233237
] }
238+
sentry-core = { workspace = true, optional = true }
234239

235240
# needed for minver purposes
236241
async-stream = { workspace = true, optional = true }
@@ -261,6 +266,7 @@ tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
261266
ipnetwork = { workspace = true }
262267
nix = { workspace = true , features = ["fs"] }
263268
tracing-subscriber = { workspace = true }
269+
sentry = { workspace = true }
264270

265271
[build-dependencies]
266272
bindgen = { workspace = true, features = ["runtime"], optional = true }

foundations/src/alerts/metrics.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
//! Panic and sentry event related metrics.
2+
3+
use crate::telemetry::metrics::Counter;
4+
5+
/// Panic metrics.
6+
#[crate::telemetry::metrics::metrics(crate_path = "crate", unprefixed)]
7+
pub mod panics {
8+
/// Total number of panics observed.
9+
pub fn total() -> Counter;
10+
}
11+
12+
/// Sentry metrics.
13+
#[cfg(feature = "sentry")]
14+
#[crate::telemetry::metrics::metrics(crate_path = "crate", unprefixed)]
15+
pub mod sentry_events {
16+
/// Total number of sentry events observed.
17+
pub fn total() -> Counter;
18+
}

foundations/src/alerts/mod.rs

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#![allow(clippy::needless_doctest_main)]
2+
//! Fatal error tracking for panics and sentry events.
3+
//!
4+
//! This module provides unified tracking of "fatal errors" whic are events that
5+
//! warrant human investigation.
6+
//!
7+
//! It includes:
8+
//! - A panic hook that increments the `panics_total` metric and logs the panic
9+
//! - A sentry hook that increments `sentry_events_total` metric (_requires the `sentry` feature_)
10+
//!
11+
//! If a previous panic or sentry hook exists, it will be executed after the
12+
//! installed foundations hook.
13+
//!
14+
//! This does not require the `metrics` feature to be enabled. If foundations
15+
//! users do not enable it, then a [`FatalErrorRegistry`] must be provided.
16+
//!
17+
//! # Usage
18+
//!
19+
//! Users of [`crate::telemetry::init()`] have the panic hook automatically
20+
//! installed. However, the sentry hook still needs to be installed.
21+
//!
22+
//! To manually install the hooks with the `metrics` feature enabled:
23+
//!
24+
//! ```rust
25+
//! fn main() {
26+
//! foundations::alerts::panic_hook().init();
27+
//!
28+
//! let mut client_opts = sentry_core::ClientOptions::default();
29+
//! foundations::alerts::sentry_hook().install(&mut client_opts);
30+
//! // sentry::init(client_opts);
31+
//! }
32+
//! ```
33+
//!
34+
//! Without the `metrics` feature, you must provide a custom registry:
35+
//!
36+
//! ```rust,ignore
37+
//! struct MyRegistry;
38+
//!
39+
//! fn main() {
40+
//! let registry = MyRegistry;
41+
//!
42+
//! foundations::alerts::panic_hook()
43+
//! .with_registry(registry)
44+
//! .init();
45+
46+
//! let mut client_opts = sentry_core::ClientOptions::default();
47+
//! foundations::alerts::sentry_hook()
48+
//! .with_registry(registry)
49+
//! .install(&mut client_opts);
50+
//! // sentry::init(client_opts);
51+
//! }
52+
//! ```
53+
54+
#[cfg(feature = "metrics")]
55+
pub mod metrics;
56+
mod panic;
57+
#[cfg(feature = "sentry")]
58+
mod sentry;
59+
60+
use std::sync::OnceLock;
61+
62+
pub(crate) static HOOK_INSTALLED: OnceLock<()> = OnceLock::new();
63+
64+
pub use self::panic::{panic_hook, PanicHookBuilder};
65+
66+
#[cfg(feature = "sentry")]
67+
pub use self::sentry::{sentry_hook, SentryHookBuilder};
68+
69+
/// Trait for recording sentry and panic hook metrics.
70+
///
71+
/// Implement this trait to use a custom metrics registry instead of
72+
/// `foundations::telemetry::metrics`.
73+
pub trait FatalErrorRegistry: Send + Sync {
74+
/// Increment the panics counter.
75+
fn inc_panics_total(&self, by: u64);
76+
77+
/// Increment the sentry events counter.
78+
fn inc_sentry_events_total(&self, by: u64);
79+
}
80+
81+
#[doc(hidden)]
82+
pub mod _private {
83+
/// The default registry implementation using foundations metrics.
84+
#[cfg(feature = "metrics")]
85+
pub struct DefaultRegistry {
86+
pub(crate) _private: (),
87+
}
88+
89+
#[cfg(feature = "metrics")]
90+
impl super::FatalErrorRegistry for DefaultRegistry {
91+
fn inc_panics_total(&self, by: u64) {
92+
super::metrics::panics::total().inc_by(by);
93+
}
94+
95+
fn inc_sentry_events_total(&self, by: u64) {
96+
super::metrics::sentry_events::total().inc_by(by);
97+
}
98+
}
99+
100+
#[derive(Default)]
101+
pub struct NeedsRegistry {
102+
pub(crate) _private: (),
103+
}
104+
105+
pub struct HasRegistry<R> {
106+
pub(crate) registry: R,
107+
}
108+
109+
#[cfg(feature = "metrics")]
110+
impl Default for HasRegistry<DefaultRegistry> {
111+
fn default() -> Self {
112+
Self {
113+
registry: DefaultRegistry { _private: () },
114+
}
115+
}
116+
}
117+
118+
#[cfg(feature = "metrics")]
119+
pub type DefaultBuilderState = HasRegistry<DefaultRegistry>;
120+
121+
#[cfg(not(feature = "metrics"))]
122+
pub type DefaultBuilderState = NeedsRegistry;
123+
}

foundations/src/alerts/panic.rs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
//! Panic hook implementation for tracking panics.
2+
3+
#[cfg(feature = "metrics")]
4+
use crate::alerts::_private::DefaultRegistry;
5+
6+
use super::_private::{DefaultBuilderState, HasRegistry, NeedsRegistry};
7+
use super::{FatalErrorRegistry, HOOK_INSTALLED};
8+
use std::panic::{self, PanicHookInfo};
9+
10+
/// Returns a builder for configuring and installing the panic hook.
11+
///
12+
/// When the `metrics` feature is enabled, a default registry is provided and
13+
/// `.init()` can be called immediately. When `metrics` is disabled, you must
14+
/// call `.with_registry()` before `.init()`.
15+
///
16+
/// See the module-level docs for more information: [`crate::alerts`]
17+
pub fn panic_hook() -> PanicHookBuilder<DefaultBuilderState> {
18+
PanicHookBuilder {
19+
state: Default::default(),
20+
}
21+
}
22+
23+
/// Builder for configuring the panic hook.
24+
///
25+
/// This builder uses the typestate pattern to ensure at compile time that a
26+
/// registry is available before [`PanicHookBuilder::init()`] can be called.
27+
/// When the `metrics` feature is enabled, `foundations::metrics` is used.
28+
#[must_use = "A PanicHookBuilder should be installed with .init()"]
29+
pub struct PanicHookBuilder<State> {
30+
pub(super) state: State,
31+
}
32+
33+
impl PanicHookBuilder<NeedsRegistry> {
34+
/// Provide a custom metrics registry for recording fatal error metrics.
35+
///
36+
/// This is required when the `metrics` feature is disabled.
37+
pub fn with_registry<R>(self, registry: R) -> PanicHookBuilder<HasRegistry<R>>
38+
where
39+
R: FatalErrorRegistry + 'static,
40+
{
41+
PanicHookBuilder {
42+
state: HasRegistry { registry },
43+
}
44+
}
45+
}
46+
47+
/// When `metrics` feature is enabled, allow overriding the default registry.
48+
#[cfg(feature = "metrics")]
49+
impl PanicHookBuilder<HasRegistry<DefaultRegistry>> {
50+
/// Provide a custom metrics registry for recording fatal error metrics.
51+
///
52+
/// This overrides the default foundations metrics registry.
53+
pub fn with_registry<R>(self, registry: R) -> PanicHookBuilder<HasRegistry<R>>
54+
where
55+
R: FatalErrorRegistry + 'static,
56+
{
57+
PanicHookBuilder {
58+
state: HasRegistry { registry },
59+
}
60+
}
61+
}
62+
63+
impl<R: FatalErrorRegistry + 'static> PanicHookBuilder<HasRegistry<R>> {
64+
/// Install the panic hook.
65+
///
66+
/// Returns `true` if this is the first installation, `false` if the hook
67+
/// was already installed (subsequent calls are no-ops).
68+
pub fn init(self) -> bool {
69+
let first_install = HOOK_INSTALLED.set(()).is_ok();
70+
if !first_install {
71+
return false;
72+
}
73+
74+
let registry = self.state.registry;
75+
let previous = panic::take_hook();
76+
77+
panic::set_hook(Box::new(move |panic_info| {
78+
registry.inc_panics_total(1);
79+
80+
log_panic(panic_info);
81+
previous(panic_info);
82+
}));
83+
84+
true
85+
}
86+
}
87+
88+
/// Log the panic using foundations telemetry if initialized, otherwise print JSON to stderr.
89+
fn log_panic(panic_info: &PanicHookInfo<'_>) {
90+
let location = panic_info.location();
91+
let payload = panic_payload_as_str(panic_info);
92+
93+
// Use foundations logging if telemetry is initialized
94+
#[cfg(feature = "logging")]
95+
if crate::telemetry::is_initialized() {
96+
crate::telemetry::log::error!(
97+
"panic occurred";
98+
"payload" => payload,
99+
"location" => ?location,
100+
);
101+
return;
102+
}
103+
104+
// Fallback to printing structured JSON to stderr
105+
let location_str = location
106+
.map(|l| format!("{}:{}:{}", l.file(), l.line(), l.column()))
107+
.unwrap_or_else(|| "<unknown>".to_string());
108+
109+
eprintln!(
110+
r#"{{"level":"error","msg":"panic occurred","payload":"{}","location":"{}"}}"#,
111+
payload.replace('\\', "\\\\").replace('"', "\\\""),
112+
location_str
113+
);
114+
}
115+
116+
fn panic_payload_as_str<'a>(panic_info: &'a PanicHookInfo<'_>) -> &'a str {
117+
let payload = panic_info.payload();
118+
119+
if let Some(s) = payload.downcast_ref::<&str>() {
120+
s
121+
} else if let Some(s) = payload.downcast_ref::<String>() {
122+
s.as_str()
123+
} else {
124+
"<non-string panic payload>"
125+
}
126+
}

foundations/src/alerts/sentry.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
//! Sentry hook implementation for tracking sentry events.
2+
3+
use super::FatalErrorRegistry;
4+
use super::_private::{DefaultBuilderState, HasRegistry, NeedsRegistry};
5+
6+
#[cfg(feature = "metrics")]
7+
use crate::alerts::_private::DefaultRegistry;
8+
9+
/// Returns a builder for configuring and installing the sentry hook. The sentry
10+
/// hook is installed by modifying a provided [`sentry_core::ClientOptions`].
11+
///
12+
/// When the `metrics` feature is enabled, the `foundations::metrics` registry
13+
/// is used `.install()` can be called immediately. When `metrics` is disabled,
14+
/// you must call `.with_registry()` before `.install()`.
15+
///
16+
/// See the module-level docs for more information: [`crate::alerts`].
17+
pub fn sentry_hook() -> SentryHookBuilder<DefaultBuilderState> {
18+
SentryHookBuilder {
19+
state: Default::default(),
20+
}
21+
}
22+
23+
/// Builder for configuring the sentry hook.
24+
///
25+
/// This builder uses the typestate pattern to ensure at compile time that a
26+
/// registry is available before `.install()` can be called. When the `metrics`
27+
/// feature is enabled, a default registry is provided automatically.
28+
pub struct SentryHookBuilder<State> {
29+
state: State,
30+
}
31+
32+
impl SentryHookBuilder<NeedsRegistry> {
33+
/// Provide a custom metrics registry for recording fatal error metrics.
34+
///
35+
/// This is required when the `metrics` feature is disabled.
36+
pub fn with_registry<R>(self, registry: R) -> SentryHookBuilder<HasRegistry<R>>
37+
where
38+
R: FatalErrorRegistry + Send + Sync + 'static,
39+
{
40+
SentryHookBuilder {
41+
state: HasRegistry { registry },
42+
}
43+
}
44+
}
45+
46+
#[cfg(feature = "metrics")]
47+
impl SentryHookBuilder<HasRegistry<DefaultRegistry>> {
48+
/// Provide a custom metrics registry for recording fatal error metrics.
49+
///
50+
/// This overrides the default `foundations::metrics` registry.
51+
pub fn with_registry<R>(self, registry: R) -> SentryHookBuilder<HasRegistry<R>>
52+
where
53+
R: FatalErrorRegistry + Send + Sync + 'static,
54+
{
55+
SentryHookBuilder {
56+
state: HasRegistry { registry },
57+
}
58+
}
59+
}
60+
61+
impl<R: FatalErrorRegistry + Send + Sync + 'static> SentryHookBuilder<HasRegistry<R>> {
62+
/// Install the sentry hook on the provided client options.
63+
///
64+
/// This installs a `before_send` hook that increments `sentry_events_total`.
65+
/// If a previous `before_send` hook exists, it will be called after incrementing
66+
/// the metric.
67+
pub fn install(self, options: &mut sentry_core::ClientOptions) {
68+
use std::sync::Arc;
69+
70+
let registry = Arc::new(self.state.registry);
71+
let previous = options.before_send.take();
72+
73+
options.before_send = Some(Arc::new(move |event| {
74+
registry.inc_sentry_events_total(1);
75+
76+
// Call previous hook if any
77+
if let Some(ref prev) = previous {
78+
prev(event)
79+
} else {
80+
Some(event)
81+
}
82+
}));
83+
}
84+
}

0 commit comments

Comments
 (0)