Skip to content

Commit d19e279

Browse files
committed
feat: panic and sentry hook for foundations
1 parent 763d19f commit d19e279

File tree

13 files changed

+871
-2
lines changed

13 files changed

+871
-2
lines changed

.github/workflows/ci.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,3 +322,9 @@ jobs:
322322
- name: Run foundations tests
323323
run: _RJEM_MALLOC_CONF=prof:true cargo nextest run --target ${{ matrix.target }}
324324
shell: bash
325+
- name: Run panic_hook tests with no default features
326+
run: _RJEM_MALLOC_CONF=prof:true cargo nextest run -p foundations --test panic_hook --no-default-features --target ${{ matrix.target }}
327+
shell: bash
328+
- name: Run sentry_hook tests with no default features
329+
run: _RJEM_MALLOC_CONF=prof:true cargo nextest run -p foundations --test sentry_hook --no-default-features --features sentry --target ${{ matrix.target }}
330+
shell: bash

Cargo.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ reqwest = { version = "0.12", default-features = false }
7474
socket2 = { version = "0.5", features = ["all"] }
7575
syn = "2"
7676
serde = "1"
77+
serde_json = "1"
7778
serde_path_to_error = "0.1.17"
7879
serde_yaml = "0.8.26"
7980
serde_with = "3.3"
@@ -91,6 +92,14 @@ tower-service = "0.3"
9192
tracing-slog = "0.3.0"
9293
tracing-subscriber = "0.3"
9394
yaml-merge-keys = { version = "0.5", features = ["serde_yaml"] }
95+
sentry-core = { version = "0.36", default-features = false }
96+
sentry = { version = "0.36", default-features = false, features = [
97+
"backtrace",
98+
"contexts",
99+
"panic",
100+
"ureq",
101+
"rustls",
102+
] }
94103

95104
# needed for minver
96105
async-stream = "0.3"

foundations/Cargo.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,12 @@ platform-common-default = [
4141
"testing",
4242
"settings_deny_unknown_fields_by_default",
4343
"panic_on_too_much_logger_nesting",
44+
"sentry",
4445
]
4546

47+
# Sentry integration for fatal error tracking
48+
sentry = ["dep:sentry-core"]
49+
4650
# A subset of features that can be used both on server and client sides. Useful for libraries
4751
# that can be used either way.
4852
server-client-common-default = ["settings", "client-telemetry", "testing"]
@@ -209,6 +213,7 @@ prometheus-client = { workspace = true, optional = true }
209213
prometools = { workspace = true, optional = true, features = ["serde"] }
210214
rand = { workspace = true, optional = true }
211215
serde = { workspace = true, optional = true, features = ["derive", "rc"] }
216+
serde_json = { workspace = true }
212217
serde_path_to_error = { workspace = true, optional = true }
213218
serde_yaml = { workspace = true, optional = true }
214219
serde_with = { workspace = true, optional = true }
@@ -231,6 +236,7 @@ tikv-jemallocator = { workspace = true, optional = true, features = [
231236
yaml-merge-keys = { workspace = true, optional = true, features = [
232237
"serde_yaml",
233238
] }
239+
sentry-core = { workspace = true, optional = true }
234240

235241
# needed for minver purposes
236242
async-stream = { workspace = true, optional = true }
@@ -261,6 +267,7 @@ tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
261267
ipnetwork = { workspace = true }
262268
nix = { workspace = true , features = ["fs"] }
263269
tracing-subscriber = { workspace = true }
270+
sentry = { workspace = true }
264271

265272
[build-dependencies]
266273
bindgen = { workspace = true, features = ["runtime"], optional = true }

foundations/src/alerts/metrics.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
//! Panic and sentry event related metrics.
2+
3+
use crate::telemetry::metrics::Counter;
4+
5+
/// Panic metrics.
6+
#[crate::telemetry::metrics::metrics(crate_path = "crate", unprefixed)]
7+
pub mod panics {
8+
/// Total number of panics observed.
9+
pub fn total() -> Counter;
10+
}
11+
12+
/// Sentry metrics.
13+
#[crate::telemetry::metrics::metrics(crate_path = "crate", unprefixed)]
14+
pub mod sentry_events {
15+
/// Total number of sentry events observed.
16+
pub fn total() -> Counter;
17+
}

foundations/src/alerts/mod.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#![allow(clippy::needless_doctest_main)]
2+
//! Fatal error tracking for panics and sentry events.
3+
//!
4+
//! This module provides unified tracking of "fatal errors" which are events that
5+
//! warrant human investigation.
6+
//!
7+
//! It includes:
8+
//! - A panic hook that increments the `panics_total` metric and logs the panic
9+
//! - A sentry hook that increments `sentry_events_total` metric (_requires the `sentry` feature_)
10+
//!
11+
//! If a previous panic or sentry hook exists, it will be executed after the
12+
//! installed foundations hook.
13+
//!
14+
//! This does not require the `metrics` feature to be enabled. If foundations
15+
//! users do not enable it, then a [`FatalErrorRegistry`] must be provided.
16+
//!
17+
//! # Usage
18+
//!
19+
//! Users of [`crate::telemetry::init()`] have the panic hook automatically
20+
//! installed. However, the sentry hook still needs to be installed.
21+
//!
22+
//! To manually install the hooks with the `metrics` feature enabled:
23+
//!
24+
//! ```rust
25+
//! fn main() {
26+
//! foundations::alerts::panic_hook().init();
27+
//!
28+
//! let mut client_opts = sentry_core::ClientOptions::default();
29+
//! foundations::alerts::sentry_hook().install(&mut client_opts);
30+
//! // sentry::init(client_opts);
31+
//! }
32+
//! ```
33+
//!
34+
//! Without the `metrics` feature, you must provide a custom registry:
35+
//!
36+
//! ```rust,ignore
37+
//! struct MyRegistry;
38+
//!
39+
//! fn main() {
40+
//! let registry = MyRegistry;
41+
//!
42+
//! foundations::alerts::panic_hook()
43+
//! .with_registry(registry)
44+
//! .init();
45+
//!
46+
//! let mut client_opts = sentry_core::ClientOptions::default();
47+
//! foundations::alerts::sentry_hook()
48+
//! .with_registry(registry)
49+
//! .install(&mut client_opts);
50+
//! // sentry::init(client_opts);
51+
//! }
52+
//! ```
53+
54+
#[cfg(feature = "metrics")]
55+
pub mod metrics;
56+
mod panic;
57+
#[cfg(feature = "sentry")]
58+
mod sentry;
59+
60+
pub use self::panic::{panic_hook, PanicHookBuilder};
61+
62+
#[cfg(feature = "sentry")]
63+
pub use self::sentry::{sentry_hook, SentryHookBuilder};
64+
65+
/// Trait for recording sentry and panic hook metrics.
66+
///
67+
/// Implement this trait to use a custom metrics registry instead of
68+
/// `foundations::metrics`.
69+
pub trait FatalErrorRegistry: Send + Sync {
70+
/// Increment the panics counter.
71+
fn inc_panics_total(&self, by: u64);
72+
73+
/// Increment the sentry events counter.
74+
fn inc_sentry_events_total(&self, by: u64);
75+
}
76+
77+
#[doc(hidden)]
78+
pub mod _private {
79+
/// The default registry implementation using foundations metrics.
80+
#[cfg(feature = "metrics")]
81+
pub struct DefaultRegistry {
82+
pub(crate) _private: (),
83+
}
84+
85+
#[cfg(feature = "metrics")]
86+
impl super::FatalErrorRegistry for DefaultRegistry {
87+
fn inc_panics_total(&self, by: u64) {
88+
super::metrics::panics::total().inc_by(by);
89+
}
90+
91+
fn inc_sentry_events_total(&self, by: u64) {
92+
super::metrics::sentry_events::total().inc_by(by);
93+
}
94+
}
95+
96+
#[derive(Default)]
97+
pub struct NeedsRegistry {
98+
pub(crate) _private: (),
99+
}
100+
101+
pub struct HasRegistry<R> {
102+
pub(crate) registry: R,
103+
}
104+
105+
#[cfg(feature = "metrics")]
106+
impl Default for HasRegistry<DefaultRegistry> {
107+
fn default() -> Self {
108+
Self {
109+
registry: DefaultRegistry { _private: () },
110+
}
111+
}
112+
}
113+
114+
#[cfg(feature = "metrics")]
115+
pub type DefaultBuilderState = HasRegistry<DefaultRegistry>;
116+
117+
#[cfg(not(feature = "metrics"))]
118+
pub type DefaultBuilderState = NeedsRegistry;
119+
}

foundations/src/alerts/panic.rs

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
//! Panic hook implementation for tracking panics.
2+
3+
#[cfg(feature = "metrics")]
4+
use crate::alerts::_private::DefaultRegistry;
5+
6+
use std::panic::{self, PanicHookInfo};
7+
use std::sync::OnceLock;
8+
9+
use super::FatalErrorRegistry;
10+
use super::_private::{DefaultBuilderState, HasRegistry, NeedsRegistry};
11+
12+
pub(crate) static HOOK_INSTALLED: OnceLock<()> = OnceLock::new();
13+
14+
/// Returns a builder for configuring and installing the panic hook.
15+
///
16+
/// When the `metrics` feature is enabled, a default registry is provided and
17+
/// [`PanicHookBuilder::init()`] can be called immediately. When `metrics` is
18+
/// disabled, you must call [`PanicHookBuilder::with_registry()`] before `.init()`.
19+
///
20+
/// See the module-level docs for more information: [`crate::alerts`]
21+
pub fn panic_hook() -> PanicHookBuilder<DefaultBuilderState> {
22+
PanicHookBuilder {
23+
state: Default::default(),
24+
}
25+
}
26+
27+
/// Builder for configuring the panic hook.
28+
///
29+
/// This builder uses the typestate pattern to ensure at compile time that a
30+
/// registry is available before [`PanicHookBuilder::init()`] can be called.
31+
/// When the `metrics` feature is enabled, `foundations::metrics` is used.
32+
#[must_use = "A PanicHookBuilder should be installed with .init()"]
33+
pub struct PanicHookBuilder<State> {
34+
pub(super) state: State,
35+
}
36+
37+
impl PanicHookBuilder<NeedsRegistry> {
38+
/// Provide a custom metrics registry for recording fatal error metrics.
39+
///
40+
/// This is required when the `metrics` feature is disabled.
41+
pub fn with_registry<R>(self, registry: R) -> PanicHookBuilder<HasRegistry<R>>
42+
where
43+
R: FatalErrorRegistry + 'static,
44+
{
45+
PanicHookBuilder {
46+
state: HasRegistry { registry },
47+
}
48+
}
49+
}
50+
51+
/// When `metrics` feature is enabled, allow overriding the default registry.
52+
#[cfg(feature = "metrics")]
53+
impl PanicHookBuilder<HasRegistry<DefaultRegistry>> {
54+
/// Provide a custom metrics registry for recording fatal error metrics.
55+
///
56+
/// This overrides the default foundations metrics registry.
57+
pub fn with_registry<R>(self, registry: R) -> PanicHookBuilder<HasRegistry<R>>
58+
where
59+
R: FatalErrorRegistry + 'static,
60+
{
61+
PanicHookBuilder {
62+
state: HasRegistry { registry },
63+
}
64+
}
65+
}
66+
67+
impl<R: FatalErrorRegistry + 'static> PanicHookBuilder<HasRegistry<R>> {
68+
/// Install the panic hook.
69+
///
70+
/// Returns `true` if this is the first installation, `false` if the hook
71+
/// was already installed (subsequent calls are no-ops).
72+
pub fn init(self) -> bool {
73+
let first_install = HOOK_INSTALLED.set(()).is_ok();
74+
if !first_install {
75+
return false;
76+
}
77+
78+
let registry = self.state.registry;
79+
let previous = panic::take_hook();
80+
81+
panic::set_hook(Box::new(move |panic_info| {
82+
registry.inc_panics_total(1);
83+
84+
log_panic(panic_info);
85+
previous(panic_info);
86+
}));
87+
88+
true
89+
}
90+
}
91+
92+
/// Log the panic using foundations telemetry if initialized, otherwise print JSON to stderr.
93+
fn log_panic(panic_info: &PanicHookInfo<'_>) {
94+
let location = panic_info.location();
95+
let payload = panic_payload_as_str(panic_info);
96+
97+
// Use foundations logging if telemetry is initialized
98+
#[cfg(feature = "logging")]
99+
if crate::telemetry::is_initialized() {
100+
crate::telemetry::log::error!(
101+
"panic occurred";
102+
"payload" => payload,
103+
"location" => ?location,
104+
);
105+
return;
106+
}
107+
108+
// Fallback to printing structured JSON to stderr
109+
let location_str = location
110+
.map(|l| format!("{}:{}:{}", l.file(), l.line(), l.column()))
111+
.unwrap_or_else(|| "<unknown>".to_string());
112+
113+
let json_output = serde_json::json!({
114+
"level": "error",
115+
"msg": "panic occurred",
116+
"payload": payload,
117+
"location": location_str
118+
});
119+
eprintln!("{}", json_output);
120+
}
121+
122+
fn panic_payload_as_str<'a>(panic_info: &'a PanicHookInfo<'_>) -> &'a str {
123+
let payload = panic_info.payload();
124+
125+
if let Some(s) = payload.downcast_ref::<&str>() {
126+
s
127+
} else if let Some(s) = payload.downcast_ref::<String>() {
128+
s.as_str()
129+
} else {
130+
"<non-string panic payload>"
131+
}
132+
}

0 commit comments

Comments
 (0)