diff --git a/crates/tap-agent/src/agent.rs b/crates/tap-agent/src/agent.rs index b7b670cab..d878b17d3 100644 --- a/crates/tap-agent/src/agent.rs +++ b/crates/tap-agent/src/agent.rs @@ -62,6 +62,17 @@ pub mod sender_allocation; /// Unaggregated receipts containing total value and last id stored in the table pub mod unaggregated_receipts; +/// Force initialization of all Prometheus metrics in the agent module. +/// +/// This should be called at startup before the metrics server is started +/// to ensure all metrics are registered with Prometheus, even if no sender +/// activity has occurred yet. +pub fn init_metrics() { + sender_account::init_metrics(); + sender_allocation::init_metrics(); + sender_accounts_manager::init_metrics(); +} + /// This is the main entrypoint for starting up tap-agent /// /// It uses the static [crate::CONFIG] to configure the agent. @@ -268,3 +279,111 @@ pub async fn start_agent( Ok(SenderAccountsManager::spawn(None, SenderAccountsManager, args).await?) } + +#[cfg(test)] +mod tests { + use prometheus::core::Collector; + + use super::*; + + #[test] + fn test_init_metrics_registers_all_metrics() { + // Call init_metrics to register all metrics + init_metrics(); + + // Verify that calling init_metrics doesn't panic (metrics are already registered) + // This is the main test - that we can safely call init_metrics at startup. + // The LazyLock pattern ensures metrics are only registered once. + init_metrics(); + + // Verify metrics are registered by directly accessing the statics. + // This ensures the LazyLock has been initialized. + // We use desc() to get the metric descriptors which proves they're registered. + + // Check sender_account metrics + assert!( + !sender_account::SENDER_DENIED.desc().is_empty(), + "tap_sender_denied should be registered" + ); + assert!( + !sender_account::ESCROW_BALANCE.desc().is_empty(), + "tap_sender_escrow_balance_grt_total should be registered" + ); + assert!( + !sender_account::UNAGGREGATED_FEES.desc().is_empty(), + "tap_unaggregated_fees_grt_total should be registered" + ); + assert!( + !sender_account::UNAGGREGATED_FEES_BY_VERSION + .desc() + .is_empty(), + "tap_unaggregated_fees_grt_total_by_version should be registered" + ); + assert!( + !sender_account::SENDER_FEE_TRACKER.desc().is_empty(), + "tap_sender_fee_tracker_grt_total should be registered" + ); + assert!( + !sender_account::INVALID_RECEIPT_FEES.desc().is_empty(), + "tap_invalid_receipt_fees_grt_total should be registered" + ); + assert!( + !sender_account::PENDING_RAV.desc().is_empty(), + "tap_pending_rav_grt_total should be registered" + ); + assert!( + !sender_account::MAX_FEE_PER_SENDER.desc().is_empty(), + "tap_max_fee_per_sender_grt_total should be registered" + ); + assert!( + !sender_account::RAV_REQUEST_TRIGGER_VALUE.desc().is_empty(), + "tap_rav_request_trigger_value should be registered" + ); + assert!( + !sender_account::ALLOCATION_RECONCILIATION_RUNS + .desc() + .is_empty(), + "tap_allocation_reconciliation_runs_total should be registered" + ); + + // Check sender_allocation metrics + assert!( + !sender_allocation::CLOSED_SENDER_ALLOCATIONS + .desc() + .is_empty(), + "tap_closed_sender_allocation_total should be registered" + ); + assert!( + !sender_allocation::RAVS_CREATED.desc().is_empty(), + "tap_ravs_created_total should be registered" + ); + assert!( + !sender_allocation::RAVS_CREATED_BY_VERSION.desc().is_empty(), + "tap_ravs_created_total_by_version should be registered" + ); + assert!( + !sender_allocation::RAVS_FAILED.desc().is_empty(), + "tap_ravs_failed_total should be registered" + ); + assert!( + !sender_allocation::RAVS_FAILED_BY_VERSION.desc().is_empty(), + "tap_ravs_failed_total_by_version should be registered" + ); + assert!( + !sender_allocation::RAV_RESPONSE_TIME.desc().is_empty(), + "tap_rav_response_time_seconds should be registered" + ); + assert!( + !sender_allocation::RAV_RESPONSE_TIME_BY_VERSION + .desc() + .is_empty(), + "tap_rav_response_time_seconds_by_version should be registered" + ); + + // Check sender_accounts_manager metrics + assert!( + !sender_accounts_manager::RECEIPTS_CREATED.desc().is_empty(), + "tap_receipts_received_total should be registered" + ); + } +} diff --git a/crates/tap-agent/src/agent/sender_account.rs b/crates/tap-agent/src/agent/sender_account.rs index 6ae8e545a..13fa3857f 100644 --- a/crates/tap-agent/src/agent/sender_account.rs +++ b/crates/tap-agent/src/agent/sender_account.rs @@ -54,10 +54,10 @@ use crate::{ tracker::{SenderFeeTracker, SimpleFeeTracker}, }; -static SENDER_DENIED: LazyLock = LazyLock::new(|| { +pub(crate) static SENDER_DENIED: LazyLock = LazyLock::new(|| { register_int_gauge_vec!("tap_sender_denied", "Sender is denied", &["sender"]).unwrap() }); -static ESCROW_BALANCE: LazyLock = LazyLock::new(|| { +pub(crate) static ESCROW_BALANCE: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_sender_escrow_balance_grt_total", "Sender escrow balance", @@ -65,7 +65,7 @@ static ESCROW_BALANCE: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static UNAGGREGATED_FEES: LazyLock = LazyLock::new(|| { +pub(crate) static UNAGGREGATED_FEES: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_unaggregated_fees_grt_total", "Unggregated Fees value", @@ -73,7 +73,7 @@ static UNAGGREGATED_FEES: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static UNAGGREGATED_FEES_BY_VERSION: LazyLock = LazyLock::new(|| { +pub(crate) static UNAGGREGATED_FEES_BY_VERSION: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_unaggregated_fees_grt_total_by_version", "Unaggregated fees per sender, allocation and TAP version", @@ -81,7 +81,7 @@ static UNAGGREGATED_FEES_BY_VERSION: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static SENDER_FEE_TRACKER: LazyLock = LazyLock::new(|| { +pub(crate) static SENDER_FEE_TRACKER: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_sender_fee_tracker_grt_total", "Sender fee tracker metric", @@ -89,7 +89,7 @@ static SENDER_FEE_TRACKER: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static INVALID_RECEIPT_FEES: LazyLock = LazyLock::new(|| { +pub(crate) static INVALID_RECEIPT_FEES: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_invalid_receipt_fees_grt_total", "Failed receipt fees", @@ -97,7 +97,7 @@ static INVALID_RECEIPT_FEES: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static PENDING_RAV: LazyLock = LazyLock::new(|| { +pub(crate) static PENDING_RAV: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_pending_rav_grt_total", "Pending ravs values", @@ -105,7 +105,7 @@ static PENDING_RAV: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static MAX_FEE_PER_SENDER: LazyLock = LazyLock::new(|| { +pub(crate) static MAX_FEE_PER_SENDER: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_max_fee_per_sender_grt_total", "Max fee per sender in the config", @@ -113,7 +113,7 @@ static MAX_FEE_PER_SENDER: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAV_REQUEST_TRIGGER_VALUE: LazyLock = LazyLock::new(|| { +pub(crate) static RAV_REQUEST_TRIGGER_VALUE: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_rav_request_trigger_value", "RAV request trigger value divisor", @@ -121,7 +121,7 @@ static RAV_REQUEST_TRIGGER_VALUE: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static ALLOCATION_RECONCILIATION_RUNS: LazyLock = LazyLock::new(|| { +pub(crate) static ALLOCATION_RECONCILIATION_RUNS: LazyLock = LazyLock::new(|| { register_int_counter_vec!( "tap_allocation_reconciliation_runs_total", "Number of allocation reconciliation runs", @@ -1690,6 +1690,18 @@ impl Actor for SenderAccount { let _ = UNAGGREGATED_FEES .remove_label_values(&[&state.sender.to_string(), &allocation_id.to_string()]); + let version = match state.sender_type { + crate::agent::sender_accounts_manager::SenderType::Legacy => TAP_V1, + crate::agent::sender_accounts_manager::SenderType::Horizon => TAP_V2, + }; + let _ = UNAGGREGATED_FEES_BY_VERSION.remove_label_values(&[ + &state.sender.to_string(), + &allocation_id.to_string(), + version, + ]); + let _ = INVALID_RECEIPT_FEES + .remove_label_values(&[&state.sender.to_string(), &allocation_id.to_string()]); + // Check for deny conditions - look up correct allocation variant from state let allocation_enum = state .allocation_ids @@ -1770,6 +1782,15 @@ impl Actor for SenderAccount { if let Some(handle) = state.reconciliation_handle.take() { handle.abort(); } + + // Clean up sender-level metrics to avoid stale gauge values + let sender_label = state.sender.to_string(); + let _ = SENDER_DENIED.remove_label_values(&[&sender_label]); + let _ = ESCROW_BALANCE.remove_label_values(&[&sender_label]); + let _ = SENDER_FEE_TRACKER.remove_label_values(&[&sender_label]); + let _ = MAX_FEE_PER_SENDER.remove_label_values(&[&sender_label]); + let _ = RAV_REQUEST_TRIGGER_VALUE.remove_label_values(&[&sender_label]); + Ok(()) } } @@ -1810,6 +1831,24 @@ impl SenderAccount { } } +/// Force initialization of all LazyLock metrics in this module. +/// +/// This ensures metrics are registered with Prometheus at startup, +/// even if no SenderAccount actors have been created yet. +pub fn init_metrics() { + // Dereference each LazyLock to force initialization + let _ = &*SENDER_DENIED; + let _ = &*ESCROW_BALANCE; + let _ = &*UNAGGREGATED_FEES; + let _ = &*UNAGGREGATED_FEES_BY_VERSION; + let _ = &*SENDER_FEE_TRACKER; + let _ = &*INVALID_RECEIPT_FEES; + let _ = &*PENDING_RAV; + let _ = &*MAX_FEE_PER_SENDER; + let _ = &*RAV_REQUEST_TRIGGER_VALUE; + let _ = &*ALLOCATION_RECONCILIATION_RUNS; +} + #[cfg(test)] pub mod tests { #![allow(missing_docs)] @@ -1835,7 +1874,11 @@ pub mod tests { Mock, MockServer, ResponseTemplate, }; - use super::{RavInformation, SenderAccountMessage, ALLOCATION_RECONCILIATION_RUNS}; + use super::{ + RavInformation, SenderAccountMessage, ALLOCATION_RECONCILIATION_RUNS, ESCROW_BALANCE, + INVALID_RECEIPT_FEES, MAX_FEE_PER_SENDER, RAV_REQUEST_TRIGGER_VALUE, SENDER_DENIED, + SENDER_FEE_TRACKER, TAP_V1, UNAGGREGATED_FEES_BY_VERSION, + }; use crate::{ agent::{ sender_account::ReceiptFees, sender_accounts_manager::AllocationId, @@ -3100,4 +3143,215 @@ pub mod tests { sender_account.stop_and_wait(None, None).await.unwrap(); } + + /// Test that UNAGGREGATED_FEES_BY_VERSION metric is cleaned up when allocation stops + /// + /// This test verifies the fix for stale gauge metrics that were introduced in the + /// Horizon V2 TAP support commit. Previously, UNAGGREGATED_FEES_BY_VERSION was set + /// but never cleaned up when allocations closed, leaving stale values in Prometheus. + #[tokio::test] + async fn test_unaggregated_fees_by_version_cleanup_on_allocation_stop() { + // Use a unique allocation ID for this test to avoid interference from other tests + // (prometheus metrics are global/shared) + let unique_allocation = test_assets::ALLOCATION_ID_1; + + let test_db = test_assets::setup_shared_test_db().await; + let pgpool = test_db.pool; + + let (sender_account, mut msg_receiver, prefix, _, _) = + create_sender_account().pgpool(pgpool).call().await; + + // Create a mock sender allocation and link it to the sender account + let (mock_sender_allocation, _, next_unaggregated_fees) = + MockSenderAllocation::new_with_triggered_rav_request(sender_account.clone()); + + let name = format!("{}:{}:{}", prefix, SENDER.1, unique_allocation); + let (allocation, _) = MockSenderAllocation::spawn_linked( + Some(name), + mock_sender_allocation, + (), + sender_account.get_cell(), + ) + .await + .unwrap(); + + // Send unaggregated fees to trigger metric set + next_unaggregated_fees.send(1000).unwrap(); + + // Directly set the metric to simulate the value being recorded + // (We do this because the actual message flow is complex and depends on + // allocation state being properly set up) + let sender_label = SENDER.1.to_string(); + let allocation_label = unique_allocation.to_string(); + UNAGGREGATED_FEES_BY_VERSION + .with_label_values(&[&sender_label, &allocation_label, TAP_V1]) + .set(1000.0); + + // Verify metric was set + let metric_value = UNAGGREGATED_FEES_BY_VERSION + .get_metric_with_label_values(&[&sender_label, &allocation_label, TAP_V1]) + .expect("Metric should exist after being set") + .get(); + assert_eq!( + metric_value, 1000.0, + "Metric should have value 1000.0 after set, got {metric_value}" + ); + + // Stop the allocation - this should trigger ActorTerminated supervision event + // which in turn should clean up the metric + allocation.stop_and_wait(None, None).await.unwrap(); + + // Give time for supervision event to be processed + flush_messages(&mut msg_receiver).await; + tokio::time::sleep(Duration::from_millis(100)).await; + + // Verify metric was cleaned up. After remove_label_values, get_metric_with_label_values + // creates a NEW metric with default value 0. So the value changing from 1000 to 0 + // proves the old metric was removed. + // See: https://docs.rs/prometheus/latest/prometheus/core/struct.MetricVec.html + let metric_value_after = UNAGGREGATED_FEES_BY_VERSION + .with_label_values(&[&sender_label, &allocation_label, TAP_V1]) + .get(); + assert_eq!( + metric_value_after, 0.0, + "Metric should be 0 after removal (old value was 1000), got {metric_value_after}" + ); + + sender_account.stop_and_wait(None, None).await.unwrap(); + } + + /// Test that INVALID_RECEIPT_FEES metric is cleaned up when allocation stops + #[tokio::test] + async fn test_invalid_receipt_fees_cleanup_on_allocation_stop() { + let unique_allocation = test_assets::ALLOCATION_ID_1; + + let test_db = test_assets::setup_shared_test_db().await; + let pgpool = test_db.pool; + + let (sender_account, mut msg_receiver, prefix, _, _) = + create_sender_account().pgpool(pgpool).call().await; + + let (mock_sender_allocation, _, next_unaggregated_fees) = + MockSenderAllocation::new_with_triggered_rav_request(sender_account.clone()); + + let name = format!("{}:{}:{}", prefix, SENDER.1, unique_allocation); + let (allocation, _) = MockSenderAllocation::spawn_linked( + Some(name), + mock_sender_allocation, + (), + sender_account.get_cell(), + ) + .await + .unwrap(); + + next_unaggregated_fees.send(1000).unwrap(); + + let sender_label = SENDER.1.to_string(); + let allocation_label = unique_allocation.to_string(); + INVALID_RECEIPT_FEES + .with_label_values(&[&sender_label, &allocation_label]) + .set(500.0); + + let metric_value = INVALID_RECEIPT_FEES + .get_metric_with_label_values(&[&sender_label, &allocation_label]) + .expect("Metric should exist after being set") + .get(); + assert_eq!( + metric_value, 500.0, + "Metric should have value 500.0 after set, got {metric_value}" + ); + + allocation.stop_and_wait(None, None).await.unwrap(); + + flush_messages(&mut msg_receiver).await; + tokio::time::sleep(Duration::from_millis(100)).await; + + let metric_value_after = INVALID_RECEIPT_FEES + .with_label_values(&[&sender_label, &allocation_label]) + .get(); + assert_eq!( + metric_value_after, 0.0, + "Metric should be 0 after removal (old value was 500), got {metric_value_after}" + ); + + sender_account.stop_and_wait(None, None).await.unwrap(); + } + + /// Test that sender-level metrics are cleaned up when SenderAccount stops + #[tokio::test] + async fn test_sender_level_gauges_cleanup_on_post_stop() { + let test_db = test_assets::setup_shared_test_db().await; + let pgpool = test_db.pool; + + let (sender_account, mut msg_receiver, _, _, _) = + create_sender_account().pgpool(pgpool).call().await; + + flush_messages(&mut msg_receiver).await; + + let sender_label = SENDER.1.to_string(); + + // Set all sender-level metrics to non-zero values + SENDER_DENIED.with_label_values(&[&sender_label]).set(1); + ESCROW_BALANCE + .with_label_values(&[&sender_label]) + .set(1000.0); + SENDER_FEE_TRACKER + .with_label_values(&[&sender_label]) + .set(500.0); + MAX_FEE_PER_SENDER + .with_label_values(&[&sender_label]) + .set(2000.0); + RAV_REQUEST_TRIGGER_VALUE + .with_label_values(&[&sender_label]) + .set(100.0); + + // Verify metrics were set + assert_eq!( + SENDER_DENIED + .get_metric_with_label_values(&[&sender_label]) + .unwrap() + .get(), + 1 + ); + assert_eq!( + ESCROW_BALANCE + .get_metric_with_label_values(&[&sender_label]) + .unwrap() + .get(), + 1000.0 + ); + + // Stop sender account - this triggers post_stop which should clean up metrics + sender_account.stop_and_wait(None, None).await.unwrap(); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Verify all sender-level metrics were cleaned up + assert_eq!( + SENDER_DENIED.with_label_values(&[&sender_label]).get(), + 0, + "SENDER_DENIED should be 0 after cleanup" + ); + assert_eq!( + ESCROW_BALANCE.with_label_values(&[&sender_label]).get(), + 0.0, + "ESCROW_BALANCE should be 0 after cleanup" + ); + assert_eq!( + SENDER_FEE_TRACKER.with_label_values(&[&sender_label]).get(), + 0.0, + "SENDER_FEE_TRACKER should be 0 after cleanup" + ); + assert_eq!( + MAX_FEE_PER_SENDER.with_label_values(&[&sender_label]).get(), + 0.0, + "MAX_FEE_PER_SENDER should be 0 after cleanup" + ); + assert_eq!( + RAV_REQUEST_TRIGGER_VALUE + .with_label_values(&[&sender_label]) + .get(), + 0.0, + "RAV_REQUEST_TRIGGER_VALUE should be 0 after cleanup" + ); + } } diff --git a/crates/tap-agent/src/agent/sender_accounts_manager.rs b/crates/tap-agent/src/agent/sender_accounts_manager.rs index 8737649f3..80fce5dc2 100644 --- a/crates/tap-agent/src/agent/sender_accounts_manager.rs +++ b/crates/tap-agent/src/agent/sender_accounts_manager.rs @@ -30,7 +30,7 @@ use super::sender_account::{ }; use crate::agent::sender_allocation::SenderAllocationMessage; -static RECEIPTS_CREATED: LazyLock = LazyLock::new(|| { +pub(crate) static RECEIPTS_CREATED: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_receipts_received_total", "Receipts received since start of the program.", @@ -1349,6 +1349,15 @@ async fn handle_notification( Ok(()) } +/// Force initialization of all LazyLock metrics in this module. +/// +/// This ensures metrics are registered with Prometheus at startup, +/// even if no receipts have been processed yet. +pub fn init_metrics() { + // Dereference each LazyLock to force initialization + let _ = &*RECEIPTS_CREATED; +} + #[cfg(test)] mod tests { use std::collections::{HashMap, HashSet}; diff --git a/crates/tap-agent/src/agent/sender_allocation.rs b/crates/tap-agent/src/agent/sender_allocation.rs index 2945708ed..e5ecb5b0a 100644 --- a/crates/tap-agent/src/agent/sender_allocation.rs +++ b/crates/tap-agent/src/agent/sender_allocation.rs @@ -46,7 +46,7 @@ use crate::{ }, }; -static CLOSED_SENDER_ALLOCATIONS: LazyLock = LazyLock::new(|| { +pub(crate) static CLOSED_SENDER_ALLOCATIONS: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_closed_sender_allocation_total", "Count of sender-allocation managers closed since the start of the program", @@ -54,7 +54,7 @@ static CLOSED_SENDER_ALLOCATIONS: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAVS_CREATED: LazyLock = LazyLock::new(|| { +pub(crate) static RAVS_CREATED: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_ravs_created_total", "RAVs updated or created per sender allocation since the start of the program", @@ -62,7 +62,7 @@ static RAVS_CREATED: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAVS_CREATED_BY_VERSION: LazyLock = LazyLock::new(|| { +pub(crate) static RAVS_CREATED_BY_VERSION: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_ravs_created_total_by_version", "RAVs created/updated per sender allocation and TAP version", @@ -70,7 +70,7 @@ static RAVS_CREATED_BY_VERSION: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAVS_FAILED: LazyLock = LazyLock::new(|| { +pub(crate) static RAVS_FAILED: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_ravs_failed_total", "RAV requests failed since the start of the program", @@ -78,7 +78,7 @@ static RAVS_FAILED: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAVS_FAILED_BY_VERSION: LazyLock = LazyLock::new(|| { +pub(crate) static RAVS_FAILED_BY_VERSION: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_ravs_failed_total_by_version", "RAV requests failed per sender allocation and TAP version", @@ -86,7 +86,7 @@ static RAVS_FAILED_BY_VERSION: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAV_RESPONSE_TIME: LazyLock = LazyLock::new(|| { +pub(crate) static RAV_RESPONSE_TIME: LazyLock = LazyLock::new(|| { register_histogram_vec!( "tap_rav_response_time_seconds", "RAV response time per sender", @@ -94,7 +94,7 @@ static RAV_RESPONSE_TIME: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAV_RESPONSE_TIME_BY_VERSION: LazyLock = LazyLock::new(|| { +pub(crate) static RAV_RESPONSE_TIME_BY_VERSION: LazyLock = LazyLock::new(|| { register_histogram_vec!( "tap_rav_response_time_seconds_by_version", "RAV response time per sender and TAP version", @@ -1461,6 +1461,21 @@ impl DatabaseInteractions for SenderAllocationState { } } +/// Force initialization of all LazyLock metrics in this module. +/// +/// This ensures metrics are registered with Prometheus at startup, +/// even if no SenderAllocation actors have been created yet. +pub fn init_metrics() { + // Dereference each LazyLock to force initialization + let _ = &*CLOSED_SENDER_ALLOCATIONS; + let _ = &*RAVS_CREATED; + let _ = &*RAVS_CREATED_BY_VERSION; + let _ = &*RAVS_FAILED; + let _ = &*RAVS_FAILED_BY_VERSION; + let _ = &*RAV_RESPONSE_TIME; + let _ = &*RAV_RESPONSE_TIME_BY_VERSION; +} + #[cfg(test)] pub mod tests { #![allow(missing_docs)] diff --git a/crates/tap-agent/src/main.rs b/crates/tap-agent/src/main.rs index 4cfbe240c..f07f27e90 100644 --- a/crates/tap-agent/src/main.rs +++ b/crates/tap-agent/src/main.rs @@ -27,6 +27,12 @@ async fn main() -> anyhow::Result<()> { // initialize LazyLock'd config _ = &*CONFIG; + // Eagerly initialize all Prometheus metrics before starting the metrics server. + // This ensures metrics are registered even if no senders have escrow accounts + // or pending allocations at startup. + agent::init_metrics(); + tracing::info!("Prometheus metrics initialized"); + let (manager, handler) = agent::start_agent() .await .with_context(|| "Failed to start TAP agent")?;