From 8de9ebc52d6ecf0bc4bfa59cc1a412e6a0a4217f Mon Sep 17 00:00:00 2001 From: Joseph Livesey Date: Sat, 6 Dec 2025 09:23:56 -0500 Subject: [PATCH 1/2] fix(tap-agent): eagerly initialize Prometheus metrics at startup Prometheus metrics in tap-agent are defined using LazyLock, which means they are only registered with the Prometheus registry when first accessed. If no senders have escrow accounts or pending allocations at startup, no SenderAccount actors are spawned, the metrics are never accessed, and /metrics returns empty. This fix creates init_metrics() functions that force all LazyLock metric statics to initialize at startup, ensuring they are registered with Prometheus before any sender activity occurs. Changes: - Make all metric static definitions pub(crate) in sender_account.rs, sender_allocation.rs, and sender_accounts_manager.rs - Add init_metrics() function to each module that dereferences each LazyLock to force initialization - Add public init_metrics() to agent.rs that calls all module-level init_metrics functions - Call agent::init_metrics() in main.rs before spawning the metrics server - Add test to verify all metrics are properly registered --- crates/tap-agent/src/agent.rs | 119 ++++++++++++++++++ crates/tap-agent/src/agent/sender_account.rs | 38 ++++-- .../src/agent/sender_accounts_manager.rs | 11 +- .../tap-agent/src/agent/sender_allocation.rs | 29 +++-- crates/tap-agent/src/main.rs | 6 + 5 files changed, 185 insertions(+), 18 deletions(-) diff --git a/crates/tap-agent/src/agent.rs b/crates/tap-agent/src/agent.rs index b7b670cab..d878b17d3 100644 --- a/crates/tap-agent/src/agent.rs +++ b/crates/tap-agent/src/agent.rs @@ -62,6 +62,17 @@ pub mod sender_allocation; /// Unaggregated receipts containing total value and last id stored in the table pub mod unaggregated_receipts; +/// Force initialization of all Prometheus metrics in the agent module. +/// +/// This should be called at startup before the metrics server is started +/// to ensure all metrics are registered with Prometheus, even if no sender +/// activity has occurred yet. +pub fn init_metrics() { + sender_account::init_metrics(); + sender_allocation::init_metrics(); + sender_accounts_manager::init_metrics(); +} + /// This is the main entrypoint for starting up tap-agent /// /// It uses the static [crate::CONFIG] to configure the agent. @@ -268,3 +279,111 @@ pub async fn start_agent( Ok(SenderAccountsManager::spawn(None, SenderAccountsManager, args).await?) } + +#[cfg(test)] +mod tests { + use prometheus::core::Collector; + + use super::*; + + #[test] + fn test_init_metrics_registers_all_metrics() { + // Call init_metrics to register all metrics + init_metrics(); + + // Verify that calling init_metrics doesn't panic (metrics are already registered) + // This is the main test - that we can safely call init_metrics at startup. + // The LazyLock pattern ensures metrics are only registered once. + init_metrics(); + + // Verify metrics are registered by directly accessing the statics. + // This ensures the LazyLock has been initialized. + // We use desc() to get the metric descriptors which proves they're registered. + + // Check sender_account metrics + assert!( + !sender_account::SENDER_DENIED.desc().is_empty(), + "tap_sender_denied should be registered" + ); + assert!( + !sender_account::ESCROW_BALANCE.desc().is_empty(), + "tap_sender_escrow_balance_grt_total should be registered" + ); + assert!( + !sender_account::UNAGGREGATED_FEES.desc().is_empty(), + "tap_unaggregated_fees_grt_total should be registered" + ); + assert!( + !sender_account::UNAGGREGATED_FEES_BY_VERSION + .desc() + .is_empty(), + "tap_unaggregated_fees_grt_total_by_version should be registered" + ); + assert!( + !sender_account::SENDER_FEE_TRACKER.desc().is_empty(), + "tap_sender_fee_tracker_grt_total should be registered" + ); + assert!( + !sender_account::INVALID_RECEIPT_FEES.desc().is_empty(), + "tap_invalid_receipt_fees_grt_total should be registered" + ); + assert!( + !sender_account::PENDING_RAV.desc().is_empty(), + "tap_pending_rav_grt_total should be registered" + ); + assert!( + !sender_account::MAX_FEE_PER_SENDER.desc().is_empty(), + "tap_max_fee_per_sender_grt_total should be registered" + ); + assert!( + !sender_account::RAV_REQUEST_TRIGGER_VALUE.desc().is_empty(), + "tap_rav_request_trigger_value should be registered" + ); + assert!( + !sender_account::ALLOCATION_RECONCILIATION_RUNS + .desc() + .is_empty(), + "tap_allocation_reconciliation_runs_total should be registered" + ); + + // Check sender_allocation metrics + assert!( + !sender_allocation::CLOSED_SENDER_ALLOCATIONS + .desc() + .is_empty(), + "tap_closed_sender_allocation_total should be registered" + ); + assert!( + !sender_allocation::RAVS_CREATED.desc().is_empty(), + "tap_ravs_created_total should be registered" + ); + assert!( + !sender_allocation::RAVS_CREATED_BY_VERSION.desc().is_empty(), + "tap_ravs_created_total_by_version should be registered" + ); + assert!( + !sender_allocation::RAVS_FAILED.desc().is_empty(), + "tap_ravs_failed_total should be registered" + ); + assert!( + !sender_allocation::RAVS_FAILED_BY_VERSION.desc().is_empty(), + "tap_ravs_failed_total_by_version should be registered" + ); + assert!( + !sender_allocation::RAV_RESPONSE_TIME.desc().is_empty(), + "tap_rav_response_time_seconds should be registered" + ); + assert!( + !sender_allocation::RAV_RESPONSE_TIME_BY_VERSION + .desc() + .is_empty(), + "tap_rav_response_time_seconds_by_version should be registered" + ); + + // Check sender_accounts_manager metrics + assert!( + !sender_accounts_manager::RECEIPTS_CREATED.desc().is_empty(), + "tap_receipts_received_total should be registered" + ); + } +} diff --git a/crates/tap-agent/src/agent/sender_account.rs b/crates/tap-agent/src/agent/sender_account.rs index 6ae8e545a..ca21fa616 100644 --- a/crates/tap-agent/src/agent/sender_account.rs +++ b/crates/tap-agent/src/agent/sender_account.rs @@ -54,10 +54,10 @@ use crate::{ tracker::{SenderFeeTracker, SimpleFeeTracker}, }; -static SENDER_DENIED: LazyLock = LazyLock::new(|| { +pub(crate) static SENDER_DENIED: LazyLock = LazyLock::new(|| { register_int_gauge_vec!("tap_sender_denied", "Sender is denied", &["sender"]).unwrap() }); -static ESCROW_BALANCE: LazyLock = LazyLock::new(|| { +pub(crate) static ESCROW_BALANCE: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_sender_escrow_balance_grt_total", "Sender escrow balance", @@ -65,7 +65,7 @@ static ESCROW_BALANCE: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static UNAGGREGATED_FEES: LazyLock = LazyLock::new(|| { +pub(crate) static UNAGGREGATED_FEES: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_unaggregated_fees_grt_total", "Unggregated Fees value", @@ -73,7 +73,7 @@ static UNAGGREGATED_FEES: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static UNAGGREGATED_FEES_BY_VERSION: LazyLock = LazyLock::new(|| { +pub(crate) static UNAGGREGATED_FEES_BY_VERSION: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_unaggregated_fees_grt_total_by_version", "Unaggregated fees per sender, allocation and TAP version", @@ -81,7 +81,7 @@ static UNAGGREGATED_FEES_BY_VERSION: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static SENDER_FEE_TRACKER: LazyLock = LazyLock::new(|| { +pub(crate) static SENDER_FEE_TRACKER: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_sender_fee_tracker_grt_total", "Sender fee tracker metric", @@ -89,7 +89,7 @@ static SENDER_FEE_TRACKER: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static INVALID_RECEIPT_FEES: LazyLock = LazyLock::new(|| { +pub(crate) static INVALID_RECEIPT_FEES: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_invalid_receipt_fees_grt_total", "Failed receipt fees", @@ -97,7 +97,7 @@ static INVALID_RECEIPT_FEES: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static PENDING_RAV: LazyLock = LazyLock::new(|| { +pub(crate) static PENDING_RAV: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_pending_rav_grt_total", "Pending ravs values", @@ -105,7 +105,7 @@ static PENDING_RAV: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static MAX_FEE_PER_SENDER: LazyLock = LazyLock::new(|| { +pub(crate) static MAX_FEE_PER_SENDER: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_max_fee_per_sender_grt_total", "Max fee per sender in the config", @@ -113,7 +113,7 @@ static MAX_FEE_PER_SENDER: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAV_REQUEST_TRIGGER_VALUE: LazyLock = LazyLock::new(|| { +pub(crate) static RAV_REQUEST_TRIGGER_VALUE: LazyLock = LazyLock::new(|| { register_gauge_vec!( "tap_rav_request_trigger_value", "RAV request trigger value divisor", @@ -121,7 +121,7 @@ static RAV_REQUEST_TRIGGER_VALUE: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static ALLOCATION_RECONCILIATION_RUNS: LazyLock = LazyLock::new(|| { +pub(crate) static ALLOCATION_RECONCILIATION_RUNS: LazyLock = LazyLock::new(|| { register_int_counter_vec!( "tap_allocation_reconciliation_runs_total", "Number of allocation reconciliation runs", @@ -1810,6 +1810,24 @@ impl SenderAccount { } } +/// Force initialization of all LazyLock metrics in this module. +/// +/// This ensures metrics are registered with Prometheus at startup, +/// even if no SenderAccount actors have been created yet. +pub fn init_metrics() { + // Dereference each LazyLock to force initialization + let _ = &*SENDER_DENIED; + let _ = &*ESCROW_BALANCE; + let _ = &*UNAGGREGATED_FEES; + let _ = &*UNAGGREGATED_FEES_BY_VERSION; + let _ = &*SENDER_FEE_TRACKER; + let _ = &*INVALID_RECEIPT_FEES; + let _ = &*PENDING_RAV; + let _ = &*MAX_FEE_PER_SENDER; + let _ = &*RAV_REQUEST_TRIGGER_VALUE; + let _ = &*ALLOCATION_RECONCILIATION_RUNS; +} + #[cfg(test)] pub mod tests { #![allow(missing_docs)] diff --git a/crates/tap-agent/src/agent/sender_accounts_manager.rs b/crates/tap-agent/src/agent/sender_accounts_manager.rs index 8737649f3..80fce5dc2 100644 --- a/crates/tap-agent/src/agent/sender_accounts_manager.rs +++ b/crates/tap-agent/src/agent/sender_accounts_manager.rs @@ -30,7 +30,7 @@ use super::sender_account::{ }; use crate::agent::sender_allocation::SenderAllocationMessage; -static RECEIPTS_CREATED: LazyLock = LazyLock::new(|| { +pub(crate) static RECEIPTS_CREATED: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_receipts_received_total", "Receipts received since start of the program.", @@ -1349,6 +1349,15 @@ async fn handle_notification( Ok(()) } +/// Force initialization of all LazyLock metrics in this module. +/// +/// This ensures metrics are registered with Prometheus at startup, +/// even if no receipts have been processed yet. +pub fn init_metrics() { + // Dereference each LazyLock to force initialization + let _ = &*RECEIPTS_CREATED; +} + #[cfg(test)] mod tests { use std::collections::{HashMap, HashSet}; diff --git a/crates/tap-agent/src/agent/sender_allocation.rs b/crates/tap-agent/src/agent/sender_allocation.rs index 2945708ed..e5ecb5b0a 100644 --- a/crates/tap-agent/src/agent/sender_allocation.rs +++ b/crates/tap-agent/src/agent/sender_allocation.rs @@ -46,7 +46,7 @@ use crate::{ }, }; -static CLOSED_SENDER_ALLOCATIONS: LazyLock = LazyLock::new(|| { +pub(crate) static CLOSED_SENDER_ALLOCATIONS: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_closed_sender_allocation_total", "Count of sender-allocation managers closed since the start of the program", @@ -54,7 +54,7 @@ static CLOSED_SENDER_ALLOCATIONS: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAVS_CREATED: LazyLock = LazyLock::new(|| { +pub(crate) static RAVS_CREATED: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_ravs_created_total", "RAVs updated or created per sender allocation since the start of the program", @@ -62,7 +62,7 @@ static RAVS_CREATED: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAVS_CREATED_BY_VERSION: LazyLock = LazyLock::new(|| { +pub(crate) static RAVS_CREATED_BY_VERSION: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_ravs_created_total_by_version", "RAVs created/updated per sender allocation and TAP version", @@ -70,7 +70,7 @@ static RAVS_CREATED_BY_VERSION: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAVS_FAILED: LazyLock = LazyLock::new(|| { +pub(crate) static RAVS_FAILED: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_ravs_failed_total", "RAV requests failed since the start of the program", @@ -78,7 +78,7 @@ static RAVS_FAILED: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAVS_FAILED_BY_VERSION: LazyLock = LazyLock::new(|| { +pub(crate) static RAVS_FAILED_BY_VERSION: LazyLock = LazyLock::new(|| { register_counter_vec!( "tap_ravs_failed_total_by_version", "RAV requests failed per sender allocation and TAP version", @@ -86,7 +86,7 @@ static RAVS_FAILED_BY_VERSION: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAV_RESPONSE_TIME: LazyLock = LazyLock::new(|| { +pub(crate) static RAV_RESPONSE_TIME: LazyLock = LazyLock::new(|| { register_histogram_vec!( "tap_rav_response_time_seconds", "RAV response time per sender", @@ -94,7 +94,7 @@ static RAV_RESPONSE_TIME: LazyLock = LazyLock::new(|| { ) .unwrap() }); -static RAV_RESPONSE_TIME_BY_VERSION: LazyLock = LazyLock::new(|| { +pub(crate) static RAV_RESPONSE_TIME_BY_VERSION: LazyLock = LazyLock::new(|| { register_histogram_vec!( "tap_rav_response_time_seconds_by_version", "RAV response time per sender and TAP version", @@ -1461,6 +1461,21 @@ impl DatabaseInteractions for SenderAllocationState { } } +/// Force initialization of all LazyLock metrics in this module. +/// +/// This ensures metrics are registered with Prometheus at startup, +/// even if no SenderAllocation actors have been created yet. +pub fn init_metrics() { + // Dereference each LazyLock to force initialization + let _ = &*CLOSED_SENDER_ALLOCATIONS; + let _ = &*RAVS_CREATED; + let _ = &*RAVS_CREATED_BY_VERSION; + let _ = &*RAVS_FAILED; + let _ = &*RAVS_FAILED_BY_VERSION; + let _ = &*RAV_RESPONSE_TIME; + let _ = &*RAV_RESPONSE_TIME_BY_VERSION; +} + #[cfg(test)] pub mod tests { #![allow(missing_docs)] diff --git a/crates/tap-agent/src/main.rs b/crates/tap-agent/src/main.rs index 4cfbe240c..f07f27e90 100644 --- a/crates/tap-agent/src/main.rs +++ b/crates/tap-agent/src/main.rs @@ -27,6 +27,12 @@ async fn main() -> anyhow::Result<()> { // initialize LazyLock'd config _ = &*CONFIG; + // Eagerly initialize all Prometheus metrics before starting the metrics server. + // This ensures metrics are registered even if no senders have escrow accounts + // or pending allocations at startup. + agent::init_metrics(); + tracing::info!("Prometheus metrics initialized"); + let (manager, handler) = agent::start_agent() .await .with_context(|| "Failed to start TAP agent")?; From 0c7e11e6fffe7738a022749dc387e31aa6c8213f Mon Sep 17 00:00:00 2001 From: Joseph Livesey Date: Sat, 6 Dec 2025 09:51:08 -0500 Subject: [PATCH 2/2] fix(tap-agent): clean up Prometheus gauges when actors stop Clean up sender and allocation-level gauge metrics when actors stop to prevent stale values from accumulating in Prometheus. Changes: - Clean up UNAGGREGATED_FEES_BY_VERSION gauge on allocation stop - Clean up INVALID_RECEIPT_FEES gauge on allocation stop - Clean up sender-level gauges (SENDER_DENIED, ESCROW_BALANCE, SENDER_FEE_TRACKER, MAX_FEE_PER_SENDER, RAV_REQUEST_TRIGGER_VALUE) in SenderAccount post_stop - Add tests to verify metric cleanup behavior --- crates/tap-agent/src/agent/sender_account.rs | 238 ++++++++++++++++++- 1 file changed, 237 insertions(+), 1 deletion(-) diff --git a/crates/tap-agent/src/agent/sender_account.rs b/crates/tap-agent/src/agent/sender_account.rs index ca21fa616..13fa3857f 100644 --- a/crates/tap-agent/src/agent/sender_account.rs +++ b/crates/tap-agent/src/agent/sender_account.rs @@ -1690,6 +1690,18 @@ impl Actor for SenderAccount { let _ = UNAGGREGATED_FEES .remove_label_values(&[&state.sender.to_string(), &allocation_id.to_string()]); + let version = match state.sender_type { + crate::agent::sender_accounts_manager::SenderType::Legacy => TAP_V1, + crate::agent::sender_accounts_manager::SenderType::Horizon => TAP_V2, + }; + let _ = UNAGGREGATED_FEES_BY_VERSION.remove_label_values(&[ + &state.sender.to_string(), + &allocation_id.to_string(), + version, + ]); + let _ = INVALID_RECEIPT_FEES + .remove_label_values(&[&state.sender.to_string(), &allocation_id.to_string()]); + // Check for deny conditions - look up correct allocation variant from state let allocation_enum = state .allocation_ids @@ -1770,6 +1782,15 @@ impl Actor for SenderAccount { if let Some(handle) = state.reconciliation_handle.take() { handle.abort(); } + + // Clean up sender-level metrics to avoid stale gauge values + let sender_label = state.sender.to_string(); + let _ = SENDER_DENIED.remove_label_values(&[&sender_label]); + let _ = ESCROW_BALANCE.remove_label_values(&[&sender_label]); + let _ = SENDER_FEE_TRACKER.remove_label_values(&[&sender_label]); + let _ = MAX_FEE_PER_SENDER.remove_label_values(&[&sender_label]); + let _ = RAV_REQUEST_TRIGGER_VALUE.remove_label_values(&[&sender_label]); + Ok(()) } } @@ -1853,7 +1874,11 @@ pub mod tests { Mock, MockServer, ResponseTemplate, }; - use super::{RavInformation, SenderAccountMessage, ALLOCATION_RECONCILIATION_RUNS}; + use super::{ + RavInformation, SenderAccountMessage, ALLOCATION_RECONCILIATION_RUNS, ESCROW_BALANCE, + INVALID_RECEIPT_FEES, MAX_FEE_PER_SENDER, RAV_REQUEST_TRIGGER_VALUE, SENDER_DENIED, + SENDER_FEE_TRACKER, TAP_V1, UNAGGREGATED_FEES_BY_VERSION, + }; use crate::{ agent::{ sender_account::ReceiptFees, sender_accounts_manager::AllocationId, @@ -3118,4 +3143,215 @@ pub mod tests { sender_account.stop_and_wait(None, None).await.unwrap(); } + + /// Test that UNAGGREGATED_FEES_BY_VERSION metric is cleaned up when allocation stops + /// + /// This test verifies the fix for stale gauge metrics that were introduced in the + /// Horizon V2 TAP support commit. Previously, UNAGGREGATED_FEES_BY_VERSION was set + /// but never cleaned up when allocations closed, leaving stale values in Prometheus. + #[tokio::test] + async fn test_unaggregated_fees_by_version_cleanup_on_allocation_stop() { + // Use a unique allocation ID for this test to avoid interference from other tests + // (prometheus metrics are global/shared) + let unique_allocation = test_assets::ALLOCATION_ID_1; + + let test_db = test_assets::setup_shared_test_db().await; + let pgpool = test_db.pool; + + let (sender_account, mut msg_receiver, prefix, _, _) = + create_sender_account().pgpool(pgpool).call().await; + + // Create a mock sender allocation and link it to the sender account + let (mock_sender_allocation, _, next_unaggregated_fees) = + MockSenderAllocation::new_with_triggered_rav_request(sender_account.clone()); + + let name = format!("{}:{}:{}", prefix, SENDER.1, unique_allocation); + let (allocation, _) = MockSenderAllocation::spawn_linked( + Some(name), + mock_sender_allocation, + (), + sender_account.get_cell(), + ) + .await + .unwrap(); + + // Send unaggregated fees to trigger metric set + next_unaggregated_fees.send(1000).unwrap(); + + // Directly set the metric to simulate the value being recorded + // (We do this because the actual message flow is complex and depends on + // allocation state being properly set up) + let sender_label = SENDER.1.to_string(); + let allocation_label = unique_allocation.to_string(); + UNAGGREGATED_FEES_BY_VERSION + .with_label_values(&[&sender_label, &allocation_label, TAP_V1]) + .set(1000.0); + + // Verify metric was set + let metric_value = UNAGGREGATED_FEES_BY_VERSION + .get_metric_with_label_values(&[&sender_label, &allocation_label, TAP_V1]) + .expect("Metric should exist after being set") + .get(); + assert_eq!( + metric_value, 1000.0, + "Metric should have value 1000.0 after set, got {metric_value}" + ); + + // Stop the allocation - this should trigger ActorTerminated supervision event + // which in turn should clean up the metric + allocation.stop_and_wait(None, None).await.unwrap(); + + // Give time for supervision event to be processed + flush_messages(&mut msg_receiver).await; + tokio::time::sleep(Duration::from_millis(100)).await; + + // Verify metric was cleaned up. After remove_label_values, get_metric_with_label_values + // creates a NEW metric with default value 0. So the value changing from 1000 to 0 + // proves the old metric was removed. + // See: https://docs.rs/prometheus/latest/prometheus/core/struct.MetricVec.html + let metric_value_after = UNAGGREGATED_FEES_BY_VERSION + .with_label_values(&[&sender_label, &allocation_label, TAP_V1]) + .get(); + assert_eq!( + metric_value_after, 0.0, + "Metric should be 0 after removal (old value was 1000), got {metric_value_after}" + ); + + sender_account.stop_and_wait(None, None).await.unwrap(); + } + + /// Test that INVALID_RECEIPT_FEES metric is cleaned up when allocation stops + #[tokio::test] + async fn test_invalid_receipt_fees_cleanup_on_allocation_stop() { + let unique_allocation = test_assets::ALLOCATION_ID_1; + + let test_db = test_assets::setup_shared_test_db().await; + let pgpool = test_db.pool; + + let (sender_account, mut msg_receiver, prefix, _, _) = + create_sender_account().pgpool(pgpool).call().await; + + let (mock_sender_allocation, _, next_unaggregated_fees) = + MockSenderAllocation::new_with_triggered_rav_request(sender_account.clone()); + + let name = format!("{}:{}:{}", prefix, SENDER.1, unique_allocation); + let (allocation, _) = MockSenderAllocation::spawn_linked( + Some(name), + mock_sender_allocation, + (), + sender_account.get_cell(), + ) + .await + .unwrap(); + + next_unaggregated_fees.send(1000).unwrap(); + + let sender_label = SENDER.1.to_string(); + let allocation_label = unique_allocation.to_string(); + INVALID_RECEIPT_FEES + .with_label_values(&[&sender_label, &allocation_label]) + .set(500.0); + + let metric_value = INVALID_RECEIPT_FEES + .get_metric_with_label_values(&[&sender_label, &allocation_label]) + .expect("Metric should exist after being set") + .get(); + assert_eq!( + metric_value, 500.0, + "Metric should have value 500.0 after set, got {metric_value}" + ); + + allocation.stop_and_wait(None, None).await.unwrap(); + + flush_messages(&mut msg_receiver).await; + tokio::time::sleep(Duration::from_millis(100)).await; + + let metric_value_after = INVALID_RECEIPT_FEES + .with_label_values(&[&sender_label, &allocation_label]) + .get(); + assert_eq!( + metric_value_after, 0.0, + "Metric should be 0 after removal (old value was 500), got {metric_value_after}" + ); + + sender_account.stop_and_wait(None, None).await.unwrap(); + } + + /// Test that sender-level metrics are cleaned up when SenderAccount stops + #[tokio::test] + async fn test_sender_level_gauges_cleanup_on_post_stop() { + let test_db = test_assets::setup_shared_test_db().await; + let pgpool = test_db.pool; + + let (sender_account, mut msg_receiver, _, _, _) = + create_sender_account().pgpool(pgpool).call().await; + + flush_messages(&mut msg_receiver).await; + + let sender_label = SENDER.1.to_string(); + + // Set all sender-level metrics to non-zero values + SENDER_DENIED.with_label_values(&[&sender_label]).set(1); + ESCROW_BALANCE + .with_label_values(&[&sender_label]) + .set(1000.0); + SENDER_FEE_TRACKER + .with_label_values(&[&sender_label]) + .set(500.0); + MAX_FEE_PER_SENDER + .with_label_values(&[&sender_label]) + .set(2000.0); + RAV_REQUEST_TRIGGER_VALUE + .with_label_values(&[&sender_label]) + .set(100.0); + + // Verify metrics were set + assert_eq!( + SENDER_DENIED + .get_metric_with_label_values(&[&sender_label]) + .unwrap() + .get(), + 1 + ); + assert_eq!( + ESCROW_BALANCE + .get_metric_with_label_values(&[&sender_label]) + .unwrap() + .get(), + 1000.0 + ); + + // Stop sender account - this triggers post_stop which should clean up metrics + sender_account.stop_and_wait(None, None).await.unwrap(); + tokio::time::sleep(Duration::from_millis(100)).await; + + // Verify all sender-level metrics were cleaned up + assert_eq!( + SENDER_DENIED.with_label_values(&[&sender_label]).get(), + 0, + "SENDER_DENIED should be 0 after cleanup" + ); + assert_eq!( + ESCROW_BALANCE.with_label_values(&[&sender_label]).get(), + 0.0, + "ESCROW_BALANCE should be 0 after cleanup" + ); + assert_eq!( + SENDER_FEE_TRACKER.with_label_values(&[&sender_label]).get(), + 0.0, + "SENDER_FEE_TRACKER should be 0 after cleanup" + ); + assert_eq!( + MAX_FEE_PER_SENDER.with_label_values(&[&sender_label]).get(), + 0.0, + "MAX_FEE_PER_SENDER should be 0 after cleanup" + ); + assert_eq!( + RAV_REQUEST_TRIGGER_VALUE + .with_label_values(&[&sender_label]) + .get(), + 0.0, + "RAV_REQUEST_TRIGGER_VALUE should be 0 after cleanup" + ); + } }