From 882bf63a624411658a822275aea1507178de3e28 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 25 Jan 2026 17:58:30 +0800 Subject: [PATCH 1/7] Refactor actor spawning methods and streamline actor system traits - Removed deprecated methods from `ActorSystemCoreExt` and `ActorSystemAdvancedExt` to simplify the API. - Consolidated actor spawning logic into a single internal method, enhancing clarity and reducing redundancy. - Updated usage of actor spawning in tests and examples to utilize the new builder pattern for improved readability and maintainability. - Adjusted imports and module structure for better organization and consistency across the codebase. --- crates/pulsing-actor/src/lib.rs | 4 +- crates/pulsing-actor/src/system/mod.rs | 12 +- crates/pulsing-actor/src/system/spawn.rs | 188 ++++-------------- crates/pulsing-actor/src/system/traits.rs | 173 +++++----------- crates/pulsing-actor/src/test_helper.rs | 1 + .../pulsing-actor/tests/supervision_tests.rs | 14 +- crates/pulsing-py/src/actor.rs | 16 +- 7 files changed, 124 insertions(+), 284 deletions(-) diff --git a/crates/pulsing-actor/src/lib.rs b/crates/pulsing-actor/src/lib.rs index 6ac2e6519..186be94bc 100644 --- a/crates/pulsing-actor/src/lib.rs +++ b/crates/pulsing-actor/src/lib.rs @@ -95,8 +95,8 @@ pub mod prelude { pub use crate::actor::{Actor, ActorContext, ActorRef, IntoActor, Message}; pub use crate::supervision::{BackoffStrategy, RestartPolicy, SupervisionSpec}; pub use crate::system::{ - ActorSystem, ActorSystemAdvancedExt, ActorSystemCoreExt, ActorSystemOpsExt, ResolveOptions, - SpawnOptions, SystemConfig, + ActorSystem, ActorSystemCoreExt, ActorSystemOpsExt, ResolveOptions, SpawnOptions, + SystemConfig, }; pub use async_trait::async_trait; pub use serde::{Deserialize, Serialize}; diff --git a/crates/pulsing-actor/src/system/mod.rs b/crates/pulsing-actor/src/system/mod.rs index 6d761bb37..688e6fc0e 100644 --- a/crates/pulsing-actor/src/system/mod.rs +++ b/crates/pulsing-actor/src/system/mod.rs @@ -21,7 +21,7 @@ pub use config::{ }; pub use handle::ActorStats; pub use load_balancer::NodeLoadTracker; -pub use traits::{ActorSystemAdvancedExt, ActorSystemCoreExt, ActorSystemOpsExt}; +pub use traits::{ActorSystemCoreExt, ActorSystemOpsExt}; use crate::actor::{ActorId, ActorPath, ActorRef, ActorResolver, ActorSystemRef, Envelope, NodeId}; use crate::cluster::{GossipBackend, HeadNodeBackend, NamingBackend}; @@ -217,7 +217,10 @@ impl ActorSystem { // Spawn as named actor with path "system" (use new_system to bypass namespace check) let system_path = ActorPath::new_system(SYSTEM_ACTOR_PATH)?; - self.spawn_named(system_path, system_actor).await?; + self.spawning() + .path(system_path) + .spawn(system_actor) + .await?; // Note: The local_actors_ref and actor_names_ref are used internally, // SystemRef snapshot may become stale for new actors but that's acceptable @@ -250,7 +253,10 @@ impl ActorSystem { // Spawn as named actor (use new_system to bypass namespace check) let system_path = ActorPath::new_system(SYSTEM_ACTOR_PATH)?; - self.spawn_named(system_path, system_actor).await?; + self.spawning() + .path(system_path) + .spawn(system_actor) + .await?; tracing::debug!( path = SYSTEM_ACTOR_PATH, diff --git a/crates/pulsing-actor/src/system/spawn.rs b/crates/pulsing-actor/src/system/spawn.rs index 4709fd7aa..5fbe48ab5 100644 --- a/crates/pulsing-actor/src/system/spawn.rs +++ b/crates/pulsing-actor/src/system/spawn.rs @@ -2,152 +2,43 @@ //! //! This module contains the implementation of actor spawning methods //! that are used by the ActorSystem. +//! +//! The core spawn implementation is in `SpawnBuilder::spawn_factory()`. +//! All other spawn methods delegate to the builder. -use crate::actor::{ - Actor, ActorContext, ActorId, ActorRef, ActorSystemRef, IntoActor, IntoActorPath, Mailbox, -}; +use crate::actor::{Actor, ActorContext, ActorId, ActorPath, ActorRef, ActorSystemRef, Mailbox}; use crate::system::config::SpawnOptions; use crate::system::handle::{ActorStats, LocalActorHandle}; -use crate::system::runtime::{run_actor_instance, run_supervision_loop}; +use crate::system::runtime::run_supervision_loop; use crate::system::ActorSystem; use std::sync::atomic::Ordering; use std::sync::Arc; impl ActorSystem { - /// Create a once-use factory from an actor instance - pub(crate) fn once_factory(actor: A) -> impl FnMut() -> anyhow::Result { - let mut actor_opt = Some(actor); - move || { - actor_opt - .take() - .ok_or_else(|| anyhow::anyhow!("Actor cannot be restarted (spawned as instance)")) - } - } - - /// Spawn an anonymous actor (no name, only accessible via ActorRef) - /// - /// Note: Anonymous actors do not support supervision/restart because they have - /// no stable identity for re-resolution. Use `spawn_named_factory` for actors - /// that need supervision. - pub async fn spawn_anonymous(self: &Arc, actor: A) -> anyhow::Result - where - A: IntoActor, - { - self.spawn_anonymous_with_options(actor.into_actor(), SpawnOptions::default()) - .await - } - - /// Spawn an anonymous actor with custom options - pub async fn spawn_anonymous_with_options( - self: &Arc, - actor: A, - options: SpawnOptions, - ) -> anyhow::Result - where - A: IntoActor, - { - let actor = actor.into_actor(); - let actor_id = self.next_actor_id(); - - let mailbox = Mailbox::with_capacity(self.mailbox_capacity(&options)); - let (sender, receiver) = mailbox.split(); - - let stats = Arc::new(ActorStats::default()); - - let actor_cancel = self.cancel_token.child_token(); - - let ctx = Self::build_context(self, actor_id, &sender, &actor_cancel, None); - - let stats_clone = stats.clone(); - let cancel = actor_cancel.clone(); - let actor_id_for_log = actor_id; - - let join_handle = tokio::spawn(async move { - let mut receiver = receiver; - let mut ctx = ctx; - let reason = - run_actor_instance(actor, &mut receiver, &mut ctx, cancel, stats_clone).await; - tracing::debug!(actor_id = ?actor_id_for_log, reason = ?reason, "Anonymous actor stopped"); - }); - - let local_id = actor_id.local_id(); - let handle = LocalActorHandle { - sender: sender.clone(), - join_handle, - cancel_token: actor_cancel, - stats: stats.clone(), - metadata: options.metadata.clone(), - named_path: None, - actor_id, - }; - - self.local_actors.insert(local_id, handle); - self.actor_names.insert(actor_id.to_string(), local_id); - - Ok(ActorRef::local(actor_id, sender)) - } - - /// Spawn a named actor (resolvable by name across the cluster) + /// Internal spawn implementation - the actual core logic /// - /// # Example - /// ```rust,ignore - /// // Name is used as both path (for resolution) and local name - /// system.spawn_named("services/echo", MyActor).await?; - /// ``` - pub async fn spawn_named(self: &Arc, name: P, actor: A) -> anyhow::Result - where - P: IntoActorPath, - A: IntoActor, - { - let path = name.into_actor_path()?; - self.spawn_named_factory( - path, - Self::once_factory(actor.into_actor()), - SpawnOptions::default(), - ) - .await - } - - /// Spawn a named actor with custom options - pub async fn spawn_named_with_options( - self: &Arc, - name: P, - actor: A, - options: SpawnOptions, - ) -> anyhow::Result - where - P: IntoActorPath, - A: IntoActor, - { - let path = name.into_actor_path()?; - self.spawn_named_factory(path, Self::once_factory(actor.into_actor()), options) - .await - } - - /// Spawn a named actor using a factory function - pub async fn spawn_named_factory( + /// This is called by `SpawnBuilder::spawn_factory()` and handles both + /// anonymous and named actor spawning. + pub(crate) async fn spawn_internal( self: &Arc, - name: P, + path: Option, factory: F, options: SpawnOptions, ) -> anyhow::Result where - P: IntoActorPath, F: FnMut() -> anyhow::Result + Send + 'static, A: Actor, { - let path = name.into_actor_path()?; - let name_str = path.as_str(); - - if self.actor_names.contains_key(&name_str.to_string()) { - return Err(anyhow::anyhow!("Actor already exists: {}", name_str)); - } + let name_str = path.as_ref().map(|p| p.as_str().to_string()); - if self.named_actor_paths.contains_key(&name_str.to_string()) { - return Err(anyhow::anyhow!( - "Named path already registered: {}", - name_str - )); + // Check for name conflicts (only for named actors) + if let Some(ref name) = name_str { + if self.actor_names.contains_key(name) { + return Err(anyhow::anyhow!("Actor already exists: {}", name)); + } + if self.named_actor_paths.contains_key(name) { + return Err(anyhow::anyhow!("Named path already registered: {}", name)); + } } let actor_id = self.next_actor_id(); @@ -161,13 +52,7 @@ impl ActorSystem { let actor_cancel = self.cancel_token.child_token(); - let ctx = Self::build_context( - self, - actor_id, - &sender, - &actor_cancel, - Some(name_str.to_string()), - ); + let ctx = Self::build_context(self, actor_id, &sender, &actor_cancel, name_str.clone()); let stats_clone = stats.clone(); let cancel = actor_cancel.clone(); @@ -187,23 +72,32 @@ impl ActorSystem { cancel_token: actor_cancel, stats: stats.clone(), metadata: metadata.clone(), - named_path: Some(path.clone()), + named_path: path.clone(), actor_id, }; self.local_actors.insert(local_id, handle); - self.actor_names.insert(name_str.to_string(), local_id); - self.named_actor_paths - .insert(name_str.to_string(), name_str.to_string()); - - if let Some(cluster) = self.cluster.read().await.as_ref() { - if metadata.is_empty() { - cluster.register_named_actor(path.clone()).await; - } else { - cluster - .register_named_actor_full(path.clone(), actor_id, metadata) - .await; + + // Register in name maps + if let Some(ref name) = name_str { + self.actor_names.insert(name.clone(), local_id); + self.named_actor_paths.insert(name.clone(), name.clone()); + + // Register with cluster if available + if let Some(ref path) = path { + if let Some(cluster) = self.cluster.read().await.as_ref() { + if metadata.is_empty() { + cluster.register_named_actor(path.clone()).await; + } else { + cluster + .register_named_actor_full(path.clone(), actor_id, metadata) + .await; + } + } } + } else { + // Anonymous actor: use actor_id as key + self.actor_names.insert(actor_id.to_string(), local_id); } Ok(ActorRef::local(actor_id, sender)) diff --git a/crates/pulsing-actor/src/system/traits.rs b/crates/pulsing-actor/src/system/traits.rs index 67355b908..dabc6bcf7 100644 --- a/crates/pulsing-actor/src/system/traits.rs +++ b/crates/pulsing-actor/src/system/traits.rs @@ -57,13 +57,6 @@ pub trait ActorSystemCoreExt: Sized { where P: IntoActorPath + Send; - /// Resolve a named actor with custom options (load balancing, node filtering) - async fn resolve_with_options( - &self, - name: &ActorPath, - options: ResolveOptions, - ) -> anyhow::Result; - /// Get a builder for resolving actors with advanced options. fn resolving(&self) -> ResolveBuilder<'_>; } @@ -71,7 +64,7 @@ pub trait ActorSystemCoreExt: Sized { /// Builder for spawning actors with advanced options. pub struct SpawnBuilder<'a> { system: &'a Arc, - name: Option, + name: Option, options: SpawnOptions, } @@ -86,8 +79,29 @@ impl<'a> SpawnBuilder<'a> { } /// Set the actor name (makes it resolvable by name) + /// + /// The name will be validated as an ActorPath. For user actors, + /// use paths like "services/echo" or "actors/counter". pub fn name(mut self, name: impl AsRef) -> Self { - self.name = Some(name.as_ref().to_string()); + // Store as ActorPath to validate early + match ActorPath::new(name.as_ref()) { + Ok(path) => self.name = Some(path), + Err(_) => { + // Store as string for now, will error on spawn + // This allows the builder pattern to continue + tracing::warn!("Invalid actor path: {}", name.as_ref()); + } + } + self + } + + /// Set the actor path directly (allows system paths) + /// + /// This method allows setting an already-validated ActorPath directly, + /// bypassing the string validation in `name()`. This is useful when + /// you already have an ActorPath or need to use system namespace paths. + pub fn path(mut self, path: ActorPath) -> Self { + self.name = Some(path); self } @@ -122,20 +136,36 @@ impl<'a> SpawnBuilder<'a> { A: IntoActor, { let actor = actor.into_actor(); + // Create a once-use factory from the actor instance + let mut actor_opt = Some(actor); + let factory = move || { + actor_opt + .take() + .ok_or_else(|| anyhow::anyhow!("Actor cannot be restarted (spawned as instance)")) + }; + self.spawn_factory(factory).await + } + + /// Spawn an actor using a factory function + /// + /// Factory-based spawning enables supervision restarts - when an actor fails, + /// the system can recreate it using the factory function. + /// + /// Note: Only named actors support supervision/restart. Anonymous actors + /// cannot be restarted because they have no stable identity for re-resolution. + pub async fn spawn_factory(self, factory: F) -> anyhow::Result + where + F: FnMut() -> anyhow::Result + Send + 'static, + A: Actor, + { match self.name { - Some(name) => { + Some(path) => { // Named actor: resolvable by name - ActorSystem::spawn_named_with_options( - self.system, - name.as_str(), - actor, - self.options, - ) - .await + ActorSystem::spawn_internal(self.system, Some(path), factory, self.options).await } None => { // Anonymous actor: not resolvable - ActorSystem::spawn_anonymous_with_options(self.system, actor, self.options).await + ActorSystem::spawn_internal(self.system, None, factory, self.options).await } } } @@ -219,38 +249,6 @@ impl<'a> ResolveBuilder<'a> { } } -// ============================================================================= -// Advanced Trait: Factory-based Spawning (Supervision/Restart) -// ============================================================================= - -/// Advanced API for factory-based actor spawning. -/// -/// Factory-based spawning enables supervision restarts - when an actor fails, -/// the system can recreate it using the factory function. -/// -/// Note: Regular `spawn` methods use a one-shot factory internally, so the actor -/// cannot be restarted. Use `spawn_named_factory` if you need supervision with -/// restart capability. Anonymous actors do not support supervision. -/// -/// -#[async_trait::async_trait] -pub trait ActorSystemAdvancedExt { - /// Spawn a named actor using a factory function (enables supervision restarts) - /// - /// Note: Only named actors support supervision/restart. Anonymous actors cannot - /// be restarted because they have no stable identity for re-resolution. - async fn spawn_named_factory( - &self, - name: P, - factory: F, - options: SpawnOptions, - ) -> anyhow::Result - where - P: IntoActorPath + Send, - F: FnMut() -> anyhow::Result + Send + 'static, - A: Actor; -} - /// Operations, introspection, and lifecycle management API. #[async_trait::async_trait] pub trait ActorSystemOpsExt { @@ -275,20 +273,6 @@ pub trait ActorSystemOpsExt { /// Get a local actor reference by name fn local_actor_ref_by_name(&self, name: &str) -> Option; - /// Spawn an anonymous actor (no name, only accessible via ActorRef) - async fn spawn_anonymous(&self, actor: A) -> anyhow::Result - where - A: IntoActor; - - /// Spawn an anonymous actor with custom options - async fn spawn_anonymous_with_options( - &self, - actor: A, - options: SpawnOptions, - ) -> anyhow::Result - where - A: IntoActor; - /// Get load tracker for a node address fn get_node_load_tracker(&self, addr: &SocketAddr) -> Option>; @@ -316,9 +300,6 @@ pub trait ActorSystemOpsExt { address: &crate::actor::ActorAddress, ) -> anyhow::Result; - /// Get all instances of a named actor across the cluster - async fn get_named_instances(&self, path: &ActorPath) -> Vec; - /// Get detailed instances with actor_id and metadata async fn get_named_instances_detailed( &self, @@ -373,7 +354,7 @@ impl ActorSystemCoreExt for Arc { where A: IntoActor, { - ActorSystem::spawn_anonymous(self, actor.into_actor()).await + self.spawning().spawn(actor).await } async fn spawn_named( @@ -384,14 +365,7 @@ impl ActorSystemCoreExt for Arc { where A: IntoActor, { - let name = name.as_ref(); - ActorSystem::spawn_named_with_options( - self, - name, - actor.into_actor(), - SpawnOptions::default(), - ) - .await + self.spawning().name(name).spawn(actor).await } fn spawning(&self) -> SpawnBuilder<'_> { @@ -409,36 +383,11 @@ impl ActorSystemCoreExt for Arc { ActorSystem::resolve_named(self.as_ref(), name, None).await } - async fn resolve_with_options( - &self, - name: &ActorPath, - options: ResolveOptions, - ) -> anyhow::Result { - ActorSystem::resolve_named_with_options(self.as_ref(), name, options).await - } - fn resolving(&self) -> ResolveBuilder<'_> { ResolveBuilder::new(self) } } -#[async_trait::async_trait] -impl ActorSystemAdvancedExt for Arc { - async fn spawn_named_factory( - &self, - name: P, - factory: F, - options: SpawnOptions, - ) -> anyhow::Result - where - P: IntoActorPath + Send, - F: FnMut() -> anyhow::Result + Send + 'static, - A: Actor, - { - ActorSystem::spawn_named_factory(self, name, factory, options).await - } -} - #[async_trait::async_trait] impl ActorSystemOpsExt for Arc { async fn system(&self) -> anyhow::Result { @@ -468,24 +417,6 @@ impl ActorSystemOpsExt for Arc { ActorSystem::local_actor_ref_by_name(self.as_ref(), name) } - async fn spawn_anonymous(&self, actor: A) -> anyhow::Result - where - A: IntoActor, - { - ActorSystem::spawn_anonymous(self, actor).await - } - - async fn spawn_anonymous_with_options( - &self, - actor: A, - options: SpawnOptions, - ) -> anyhow::Result - where - A: IntoActor, - { - ActorSystem::spawn_anonymous_with_options(self, actor, options).await - } - fn get_node_load_tracker(&self, addr: &SocketAddr) -> Option> { ActorSystem::get_node_load_tracker(self.as_ref(), addr) } @@ -509,10 +440,6 @@ impl ActorSystemOpsExt for Arc { ActorSystem::resolve(self.as_ref(), address).await } - async fn get_named_instances(&self, path: &ActorPath) -> Vec { - ActorSystem::get_named_instances(self.as_ref(), path).await - } - async fn get_named_instances_detailed( &self, path: &ActorPath, diff --git a/crates/pulsing-actor/src/test_helper.rs b/crates/pulsing-actor/src/test_helper.rs index 1c6ebec79..fe5b4de6f 100644 --- a/crates/pulsing-actor/src/test_helper.rs +++ b/crates/pulsing-actor/src/test_helper.rs @@ -18,6 +18,7 @@ use crate::actor::{Actor, ActorContext, ActorRef, Message}; use crate::system::{ActorSystem, SystemConfig}; +use crate::ActorSystemCoreExt; use async_trait::async_trait; use serde::{Deserialize, Serialize}; use std::sync::atomic::{AtomicUsize, Ordering}; diff --git a/crates/pulsing-actor/tests/supervision_tests.rs b/crates/pulsing-actor/tests/supervision_tests.rs index 7c717017f..227d6d484 100644 --- a/crates/pulsing-actor/tests/supervision_tests.rs +++ b/crates/pulsing-actor/tests/supervision_tests.rs @@ -43,10 +43,11 @@ async fn test_restart_on_failure() { Duration::from_millis(100), )); - let options = SpawnOptions::new().supervision(spec); - let actor_ref = system - .spawn_named_factory("test/failing", factory, options) + .spawning() + .name("test/failing") + .supervision(spec) + .spawn_factory(factory) .await .unwrap(); @@ -100,10 +101,11 @@ async fn test_max_restarts_exceeded() { factor: 1.0, }); - let options = SpawnOptions::new().supervision(spec); - let actor_ref = system - .spawn_named_factory("test/crashing", factory, options) + .spawning() + .name("test/crashing") + .supervision(spec) + .spawn_factory(factory) .await .unwrap(); diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index f796fa212..d4e1fb48f 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -1234,7 +1234,9 @@ impl PyActorSystem { // actor is the instance let actor_wrapper = PythonActorWrapper::new(actor, event_loop); system - .spawn_anonymous_with_options(actor_wrapper, options) + .spawning() + .metadata(options.metadata) + .spawn(actor_wrapper) .await .map_err(to_pyerr)? } @@ -1258,7 +1260,11 @@ impl PyActorSystem { // actor is the instance let actor_wrapper = PythonActorWrapper::new(actor, event_loop); system - .spawn_named_with_options(path, actor_wrapper, options) + .spawning() + .path(path) + .supervision(options.supervision) + .metadata(options.metadata) + .spawn(actor_wrapper) .await .map_err(to_pyerr)? } else { @@ -1273,7 +1279,11 @@ impl PyActorSystem { }) }; system - .spawn_named_factory(path, factory, options) + .spawning() + .path(path) + .supervision(options.supervision) + .metadata(options.metadata) + .spawn_factory(factory) .await .map_err(to_pyerr)? } From 37ede15dada20a6c14c1f1d1f5d050a6cd4afbdf Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 25 Jan 2026 20:05:29 +0800 Subject: [PATCH 2/7] Refactor actor reference handling and enhance message serialization options - Simplified the handling of lazy actor references by delegating message sending to resolved references, reducing complexity and avoiding recursion. - Introduced a new `Format` enum for message serialization, supporting binary and JSON formats, with auto-detection capabilities for improved flexibility. - Updated the `SpawnOptions` and `ResolveOptions` structures to utilize default constructors, enhancing code clarity and consistency. - Improved error handling in the `SpawnBuilder` for actor name validation, ensuring clearer feedback on invalid paths. - Adjusted documentation and examples to reflect the new default options and serialization methods. --- crates/pulsing-actor/src/actor/reference.rs | 36 +++---------- crates/pulsing-actor/src/actor/traits.rs | 51 ++++++++++++++++++ crates/pulsing-actor/src/system/config.rs | 40 ++++++-------- crates/pulsing-actor/src/system/mod.rs | 2 +- crates/pulsing-actor/src/system/resolve.rs | 8 +-- crates/pulsing-actor/src/system/traits.rs | 57 +++++++++----------- crates/pulsing-actor/src/system_actor/mod.rs | 23 +++----- crates/pulsing-py/src/actor.rs | 2 +- docs/src/api/overview.zh.md | 2 +- docs/src/api/rust.md | 2 +- docs/src/api/rust.zh.md | 2 +- docs/src/api_reference.md | 2 +- docs/src/api_reference.zh.md | 2 +- 13 files changed, 118 insertions(+), 111 deletions(-) diff --git a/crates/pulsing-actor/src/actor/reference.rs b/crates/pulsing-actor/src/actor/reference.rs index 85c044271..eed295d17 100644 --- a/crates/pulsing-actor/src/actor/reference.rs +++ b/crates/pulsing-actor/src/actor/reference.rs @@ -215,24 +215,10 @@ impl ActorRef { remote.transport.send_message(&self.actor_id, msg).await } ActorRefInner::Lazy(lazy) => { - // Resolve and call the underlying send directly to avoid recursion + // Resolve and delegate to the resolved reference let resolved = lazy.get().await?; - match &resolved.inner { - ActorRefInner::Local(sender) => { - let (tx, rx) = oneshot::channel(); - sender - .send(Envelope::ask(msg, tx)) - .await - .map_err(|_| anyhow::anyhow!("Actor mailbox closed"))?; - rx.await.map_err(|_| anyhow::anyhow!("Actor dropped"))? - } - ActorRefInner::Remote(remote) => { - remote.transport.send_message(&resolved.actor_id, msg).await - } - ActorRefInner::Lazy(_) => { - Err(anyhow::anyhow!("Nested lazy refs not supported")) - } - } + // Box the recursive future to avoid infinite size + Box::pin(resolved.send(msg)).await } } } @@ -248,20 +234,10 @@ impl ActorRef { remote.transport.send_oneway(&self.actor_id, msg).await } ActorRefInner::Lazy(lazy) => { - // Resolve and call the underlying send_oneway directly to avoid recursion + // Resolve and delegate to the resolved reference let resolved = lazy.get().await?; - match &resolved.inner { - ActorRefInner::Local(sender) => sender - .send(Envelope::tell(msg)) - .await - .map_err(|_| anyhow::anyhow!("Actor mailbox closed")), - ActorRefInner::Remote(remote) => { - remote.transport.send_oneway(&resolved.actor_id, msg).await - } - ActorRefInner::Lazy(_) => { - Err(anyhow::anyhow!("Nested lazy refs not supported")) - } - } + // Box the recursive future to avoid infinite size + Box::pin(resolved.send_oneway(msg)).await } } } diff --git a/crates/pulsing-actor/src/actor/traits.rs b/crates/pulsing-actor/src/actor/traits.rs index 2dbcecd9c..6814163f1 100644 --- a/crates/pulsing-actor/src/actor/traits.rs +++ b/crates/pulsing-actor/src/actor/traits.rs @@ -3,6 +3,7 @@ use async_trait::async_trait; use futures::Stream; use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use serde_json; use std::collections::HashMap; use std::fmt; use std::hash::Hash; @@ -81,6 +82,48 @@ pub enum StopReason { SystemShutdown, } +/// Message serialization format +#[allow(dead_code)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Format { + /// Binary format (bincode) + Bincode, + /// JSON format (serde_json) + Json, + /// Auto-detect format (try JSON first, then bincode) + Auto, +} + +impl Format { + /// Parse data using this format + pub fn parse(&self, data: &[u8]) -> anyhow::Result { + match self { + Format::Bincode => Ok(bincode::deserialize(data)?), + Format::Json => Ok(serde_json::from_slice(data)?), + Format::Auto => { + // Try JSON first for Python compatibility, then bincode + match serde_json::from_slice(data) { + Ok(value) => Ok(value), + Err(_) => Ok(bincode::deserialize(data)?), + } + } + } + } + + /// Serialize data using this format + #[allow(dead_code)] + pub fn serialize(&self, value: &T) -> anyhow::Result> { + match self { + Format::Bincode => Ok(bincode::serialize(value)?), + Format::Json => Ok(serde_json::to_vec(value)?), + Format::Auto => { + // Default to bincode for Auto serialization + Ok(bincode::serialize(value)?) + } + } + } +} + /// Message stream type (stream of Single messages). pub type MessageStream = Pin> + Send>>; @@ -118,6 +161,14 @@ impl Message { } } + /// Parse message data with auto-detection (JSON first, then bincode) + pub fn parse(&self) -> anyhow::Result { + match self { + Message::Single { data, .. } => Format::Auto.parse(data), + Message::Stream { .. } => Err(anyhow::anyhow!("Cannot parse stream message")), + } + } + pub fn from_channel( default_msg_type: impl Into, rx: mpsc::Receiver>, diff --git a/crates/pulsing-actor/src/system/config.rs b/crates/pulsing-actor/src/system/config.rs index de9f891c4..f7843f26e 100644 --- a/crates/pulsing-actor/src/system/config.rs +++ b/crates/pulsing-actor/src/system/config.rs @@ -245,11 +245,6 @@ pub struct ActorSystemBuilder { } impl ActorSystemBuilder { - /// Create a new builder with default configuration - pub fn new() -> Self { - Self::default() - } - /// Set the bind address /// /// Accepts `&str`, `String`, or `SocketAddr`. @@ -471,14 +466,14 @@ mod tests { #[test] fn test_spawn_options_default() { - let options = SpawnOptions::new(); + let options = SpawnOptions::default(); assert!(options.mailbox_capacity.is_none()); assert!(options.metadata.is_empty()); } #[test] fn test_spawn_options_builder() { - let options = SpawnOptions::new() + let options = SpawnOptions::default() .mailbox_capacity(512) .metadata([("key".to_string(), "value".to_string())].into()); @@ -488,7 +483,7 @@ mod tests { #[test] fn test_resolve_options_default() { - let options = ResolveOptions::new(); + let options = ResolveOptions::default(); assert!(options.node_id.is_none()); assert!(options.policy.is_none()); assert!(options.filter_alive); @@ -497,7 +492,9 @@ mod tests { #[test] fn test_resolve_options_builder() { let node_id = NodeId::new(123); - let options = ResolveOptions::new().node_id(node_id).filter_alive(false); + let options = ResolveOptions::default() + .node_id(node_id) + .filter_alive(false); assert_eq!(options.node_id, Some(node_id)); assert!(!options.filter_alive); @@ -553,11 +550,6 @@ pub struct SpawnOptions { } impl SpawnOptions { - /// Create new spawn options with defaults - pub fn new() -> Self { - Self::default() - } - /// Set mailbox capacity override pub fn mailbox_capacity(mut self, capacity: usize) -> Self { self.mailbox_capacity = Some(capacity); @@ -578,7 +570,7 @@ impl SpawnOptions { } /// Options for resolving named actors -#[derive(Clone, Default)] +#[derive(Clone)] pub struct ResolveOptions { /// Target node ID (if specified, skip load balancing) pub node_id: Option, @@ -588,6 +580,16 @@ pub struct ResolveOptions { pub filter_alive: bool, } +impl Default for ResolveOptions { + fn default() -> Self { + Self { + node_id: None, + policy: None, + filter_alive: true, + } + } +} + impl std::fmt::Debug for ResolveOptions { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("ResolveOptions") @@ -599,14 +601,6 @@ impl std::fmt::Debug for ResolveOptions { } impl ResolveOptions { - /// Create new resolve options with defaults - pub fn new() -> Self { - Self { - filter_alive: true, - ..Default::default() - } - } - /// Set target node ID (bypasses load balancing) pub fn node_id(mut self, node_id: NodeId) -> Self { self.node_id = Some(node_id); diff --git a/crates/pulsing-actor/src/system/mod.rs b/crates/pulsing-actor/src/system/mod.rs index 688e6fc0e..28f593558 100644 --- a/crates/pulsing-actor/src/system/mod.rs +++ b/crates/pulsing-actor/src/system/mod.rs @@ -90,7 +90,7 @@ impl ActorSystem { /// let system = ActorSystem::builder().build().await?; /// ``` pub fn builder() -> ActorSystemBuilder { - ActorSystemBuilder::new() + ActorSystemBuilder::default() } /// Create a new actor system diff --git a/crates/pulsing-actor/src/system/resolve.rs b/crates/pulsing-actor/src/system/resolve.rs index 40959aa1c..28cd46a5b 100644 --- a/crates/pulsing-actor/src/system/resolve.rs +++ b/crates/pulsing-actor/src/system/resolve.rs @@ -75,9 +75,9 @@ impl ActorSystem { { let path = path.into_actor_path()?; let options = if let Some(nid) = node_id { - ResolveOptions::new().node_id(*nid) + ResolveOptions::default().node_id(*nid) } else { - ResolveOptions::new() + ResolveOptions::default() }; self.resolve_named_with_options(&path, options).await } @@ -107,9 +107,9 @@ impl ActorSystem { node_id: Option<&NodeId>, ) -> anyhow::Result { let options = if let Some(nid) = node_id { - ResolveOptions::new().node_id(*nid) + ResolveOptions::default().node_id(*nid) } else { - ResolveOptions::new() + ResolveOptions::default() }; self.resolve_named_with_options(path, options).await } diff --git a/crates/pulsing-actor/src/system/traits.rs b/crates/pulsing-actor/src/system/traits.rs index dabc6bcf7..d24fdfae8 100644 --- a/crates/pulsing-actor/src/system/traits.rs +++ b/crates/pulsing-actor/src/system/traits.rs @@ -65,6 +65,7 @@ pub trait ActorSystemCoreExt: Sized { pub struct SpawnBuilder<'a> { system: &'a Arc, name: Option, + name_error: Option, options: SpawnOptions, } @@ -74,6 +75,7 @@ impl<'a> SpawnBuilder<'a> { Self { system, name: None, + name_error: None, options: SpawnOptions::default(), } } @@ -82,14 +84,19 @@ impl<'a> SpawnBuilder<'a> { /// /// The name will be validated as an ActorPath. For user actors, /// use paths like "services/echo" or "actors/counter". + /// + /// If validation fails, the error will be stored and returned when `spawn()` or `spawn_factory()` is called. pub fn name(mut self, name: impl AsRef) -> Self { - // Store as ActorPath to validate early match ActorPath::new(name.as_ref()) { - Ok(path) => self.name = Some(path), - Err(_) => { - // Store as string for now, will error on spawn - // This allows the builder pattern to continue - tracing::warn!("Invalid actor path: {}", name.as_ref()); + Ok(path) => { + self.name = Some(path); + self.name_error = None; // Clear any previous error + } + Err(e) => { + // Store error message for later reporting + self.name_error = Some(format!("Invalid actor path '{}': {}", name.as_ref(), e)); + self.name = None; + tracing::warn!("{}", self.name_error.as_ref().unwrap()); } } self @@ -102,6 +109,7 @@ impl<'a> SpawnBuilder<'a> { /// you already have an ActorPath or need to use system namespace paths. pub fn path(mut self, path: ActorPath) -> Self { self.name = Some(path); + self.name_error = None; // Clear any previous error self } @@ -158,6 +166,11 @@ impl<'a> SpawnBuilder<'a> { F: FnMut() -> anyhow::Result + Send + 'static, A: Actor, { + // Check if name validation failed + if let Some(ref error) = self.name_error { + return Err(anyhow::anyhow!("{}", error)); + } + match self.name { Some(path) => { // Named actor: resolvable by name @@ -174,9 +187,7 @@ impl<'a> SpawnBuilder<'a> { /// Builder for resolving actors with advanced options. pub struct ResolveBuilder<'a> { system: &'a Arc, - node_id: Option, - policy: Option>, - filter_alive: bool, + options: ResolveOptions, } impl<'a> ResolveBuilder<'a> { @@ -184,51 +195,35 @@ impl<'a> ResolveBuilder<'a> { pub(crate) fn new(system: &'a Arc) -> Self { Self { system, - node_id: None, - policy: None, - filter_alive: true, + options: ResolveOptions::default(), } } /// Target a specific node (bypasses load balancing) pub fn node(mut self, node_id: NodeId) -> Self { - self.node_id = Some(node_id); + self.options = self.options.node_id(node_id); self } /// Set load balancing policy pub fn policy(mut self, policy: Arc) -> Self { - self.policy = Some(policy); + self.options = self.options.policy(policy); self } /// Set whether to filter only alive nodes (default: true) pub fn filter_alive(mut self, filter: bool) -> Self { - self.filter_alive = filter; + self.options = self.options.filter_alive(filter); self } - /// Build ResolveOptions from this builder - fn build_options(&self) -> ResolveOptions { - let mut options = ResolveOptions::new(); - if let Some(node_id) = self.node_id { - options = options.node_id(node_id); - } - if let Some(ref policy) = self.policy { - options = options.policy(policy.clone()); - } - options = options.filter_alive(self.filter_alive); - options - } - /// Resolve a named actor pub async fn resolve

(self, name: P) -> anyhow::Result where P: IntoActorPath + Send, { let path = name.into_actor_path()?; - let options = self.build_options(); - ActorSystem::resolve_named_with_options(self.system, &path, options).await + ActorSystem::resolve_named_with_options(self.system, &path, self.options).await } /// List all instances of a named actor @@ -237,7 +232,7 @@ impl<'a> ResolveBuilder<'a> { P: IntoActorPath + Send, { let path = name.into_actor_path()?; - ActorSystem::resolve_all_instances(self.system, &path, self.filter_alive).await + ActorSystem::resolve_all_instances(self.system, &path, self.options.filter_alive).await } /// Lazy resolve - returns ActorRef that auto re-resolves when stale diff --git a/crates/pulsing-actor/src/system_actor/mod.rs b/crates/pulsing-actor/src/system_actor/mod.rs index 41115c36d..54a6f0bf5 100644 --- a/crates/pulsing-actor/src/system_actor/mod.rs +++ b/crates/pulsing-actor/src/system_actor/mod.rs @@ -337,23 +337,14 @@ impl Actor for SystemActor { async fn receive(&mut self, msg: Message, _ctx: &mut ActorContext) -> anyhow::Result { self.metrics.inc_message(); - // Parse system message (try JSON first for Python compatibility) + // Parse system message using auto-detection (JSON first, then bincode) let sys_msg: SystemMessage = match &msg { - Message::Single { data, .. } => { - // Try JSON parsing first (Python compatible) - match serde_json::from_slice(data) { - Ok(m) => m, - Err(_) => { - // Then try bincode parsing (Rust native) - match bincode::deserialize(data) { - Ok(m) => m, - Err(e) => { - return self.json_error_response(&format!( - "Invalid message format: {}", - e - )); - } - } + Message::Single { .. } => { + match msg.parse() { + Ok(msg) => msg, + Err(e) => { + // Return error response instead of propagating error + return self.json_error_response(&format!("Invalid message format: {}", e)); } } } diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index d4e1fb48f..6bc9f28c7 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -1217,7 +1217,7 @@ impl PyActorSystem { let _ = public; pyo3_async_runtimes::tokio::future_into_py(py, async move { - let options = pulsing_actor::system::SpawnOptions::new() + let options = pulsing_actor::system::SpawnOptions::default() .supervision(supervision) .metadata(metadata); diff --git a/docs/src/api/overview.zh.md b/docs/src/api/overview.zh.md index 53f1dc2bd..8be18aa1a 100644 --- a/docs/src/api/overview.zh.md +++ b/docs/src/api/overview.zh.md @@ -212,7 +212,7 @@ let response = actor.ask(Ping(42)).await?; Factory 模式生成,支持监督重启(仅命名 actor): ```rust -let options = SpawnOptions::new() +let options = SpawnOptions::default() .supervision(SupervisionSpec::on_failure().max_restarts(3)); // 仅命名 actor 支持 supervision diff --git a/docs/src/api/rust.md b/docs/src/api/rust.md index 4e66c70a0..a74352824 100644 --- a/docs/src/api/rust.md +++ b/docs/src/api/rust.md @@ -174,7 +174,7 @@ Actors can be configured with restart policies for fault tolerance. ```rust use pulsing_actor::system::SupervisionSpec; -let options = SpawnOptions::new() +let options = SpawnOptions::default() .supervision(SupervisionSpec::on_failure().max_restarts(3)); // Factory-based spawning with supervision diff --git a/docs/src/api/rust.zh.md b/docs/src/api/rust.zh.md index 0d0f14ea5..52df03f71 100644 --- a/docs/src/api/rust.zh.md +++ b/docs/src/api/rust.zh.md @@ -174,7 +174,7 @@ Actor 可以配置重启策略以实现容错。 ```rust use pulsing_actor::system::SupervisionSpec; -let options = SpawnOptions::new() +let options = SpawnOptions::default() .supervision(SupervisionSpec::on_failure().max_restarts(3)); // 基于工厂的生成,支持监督 diff --git a/docs/src/api_reference.md b/docs/src/api_reference.md index fafb074b8..25f040764 100644 --- a/docs/src/api_reference.md +++ b/docs/src/api_reference.md @@ -390,7 +390,7 @@ system.resolving().lazy(name)?; // Lazy resolution (~5s TTL auto- Factory-based spawning for supervision restarts (named actors only): ```rust -let options = SpawnOptions::new() +let options = SpawnOptions::default() .supervision(SupervisionSpec::on_failure().max_restarts(3)); // Only named actors support supervision (anonymous cannot be re-resolved) diff --git a/docs/src/api_reference.zh.md b/docs/src/api_reference.zh.md index 6195f8b91..1e303e4f8 100644 --- a/docs/src/api_reference.zh.md +++ b/docs/src/api_reference.zh.md @@ -413,7 +413,7 @@ system.resolving().lazy(name)?; // 懒解析(~5s TTL 自动刷 Factory 模式 spawn,支持 supervision 重启(仅命名 actor): ```rust -let options = SpawnOptions::new() +let options = SpawnOptions::default() .supervision(SupervisionSpec::on_failure().max_restarts(3)); // 仅命名 actor 支持 supervision(匿名 actor 无法重新解析) From 423febe6843e2b7240b78e75b3558a0d134b7992 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 25 Jan 2026 20:20:12 +0800 Subject: [PATCH 3/7] Refactor ActorLifecycle to use ActorId for watcher management - Updated the ActorLifecycle struct to replace string-based identifiers with ActorId for both watchers and targets, enhancing type safety and consistency. - Modified watch and unwatch methods to accept ActorId parameters, streamlining the registration and removal of watchers. - Adjusted related methods and tests to reflect the new ActorId usage, ensuring proper functionality and improved clarity in actor relationships. - Enhanced logging to utilize ActorId for better traceability during watch operations. --- crates/pulsing-actor/src/actor/context.rs | 168 ++++++++----------- crates/pulsing-actor/src/behavior/core.rs | 9 +- crates/pulsing-actor/src/system/lifecycle.rs | 10 +- crates/pulsing-actor/src/system/mod.rs | 8 +- crates/pulsing-actor/src/watch.rs | 139 +++++++-------- 5 files changed, 150 insertions(+), 184 deletions(-) diff --git a/crates/pulsing-actor/src/actor/context.rs b/crates/pulsing-actor/src/actor/context.rs index c3453f10a..cdd75188c 100644 --- a/crates/pulsing-actor/src/actor/context.rs +++ b/crates/pulsing-actor/src/actor/context.rs @@ -13,17 +13,10 @@ use tokio_util::sync::CancellationToken; /// Context provided to actors during message handling. pub struct ActorContext { actor_id: ActorId, - - node_id: Option, - cancel_token: CancellationToken, - actor_refs: HashMap, - - system: Option>, - - self_sender: Option>, - + system: Arc, + self_sender: mpsc::Sender, named_path: Option, } @@ -42,36 +35,37 @@ pub trait ActorSystemRef: Send + Sync { } impl ActorContext { - pub fn new(actor_id: ActorId) -> Self { + /// Create a new ActorContext with all required fields. + /// + /// This is the main constructor for runtime use. All fields are required. + pub fn new( + actor_id: ActorId, + system: Arc, + cancel_token: CancellationToken, + self_sender: mpsc::Sender, + named_path: Option, + ) -> Self { Self { actor_id, - node_id: None, - cancel_token: CancellationToken::new(), + cancel_token, actor_refs: HashMap::new(), - system: None, - self_sender: None, - named_path: None, + system, + self_sender, + named_path, } } + /// Create a context with system but without a named path. pub fn with_system( actor_id: ActorId, system: Arc, cancel_token: CancellationToken, self_sender: mpsc::Sender, ) -> Self { - let node_id = Some(system.node_id()); - Self { - actor_id, - node_id, - cancel_token, - actor_refs: HashMap::new(), - system: Some(system), - self_sender: Some(self_sender), - named_path: None, - } + Self::new(actor_id, system, cancel_token, self_sender, None) } + /// Create a context with system and optional named path. pub fn with_system_and_name( actor_id: ActorId, system: Arc, @@ -79,23 +73,14 @@ impl ActorContext { self_sender: mpsc::Sender, named_path: Option, ) -> Self { - let node_id = Some(system.node_id()); - Self { - actor_id, - node_id, - cancel_token, - actor_refs: HashMap::new(), - system: Some(system), - self_sender: Some(self_sender), - named_path, - } + Self::new(actor_id, system, cancel_token, self_sender, named_path) } pub fn named_path(&self) -> Option<&str> { self.named_path.as_deref() } - pub fn system(&self) -> Option> { + pub fn system(&self) -> Arc { self.system.clone() } @@ -103,8 +88,9 @@ impl ActorContext { &self.actor_id } - pub fn node_id(&self) -> Option<&NodeId> { - self.node_id.as_ref() + /// Get the node ID from the system reference. + pub fn node_id(&self) -> NodeId { + self.system.node_id() } pub fn cancel_token(&self) -> &CancellationToken { @@ -120,13 +106,9 @@ impl ActorContext { return Ok(r.clone()); } - if let Some(ref system) = self.system { - let r = system.actor_ref(id).await?; - self.actor_refs.insert(*id, r.clone()); - return Ok(r); - } - - Err(anyhow::anyhow!("No system reference available")) + let r = self.system.actor_ref(id).await?; + self.actor_refs.insert(*id, r.clone()); + Ok(r) } /// Schedule a delayed message to self. @@ -135,10 +117,7 @@ impl ActorContext { msg: M, delay: Duration, ) -> anyhow::Result<()> { - let sender = self.self_sender.clone().ok_or_else(|| { - anyhow::anyhow!("No self sender available (context not fully initialized)") - })?; - + let sender = self.self_sender.clone(); let message = Message::pack(&msg)?; tokio::spawn(async move { @@ -154,62 +133,64 @@ impl ActorContext { /// Watch another actor. pub async fn watch(&self, target: &ActorId) -> anyhow::Result<()> { - if let Some(ref system) = self.system { - system.watch(&self.actor_id, target).await - } else { - Err(anyhow::anyhow!("No system reference available")) - } + self.system.watch(&self.actor_id, target).await } /// Stop watching another actor. pub async fn unwatch(&self, target: &ActorId) -> anyhow::Result<()> { - if let Some(ref system) = self.system { - system.unwatch(&self.actor_id, target).await - } else { - Err(anyhow::anyhow!("No system reference available")) - } + self.system.unwatch(&self.actor_id, target).await } } #[cfg(test)] mod tests { use super::*; + use crate::system::{ActorSystem, SystemConfig}; - #[test] - fn test_context_creation() { - let ctx = ActorContext::new(ActorId::local(1)); + async fn create_test_context(actor_id: ActorId) -> (ActorContext, Arc) { + let system = ActorSystem::new(SystemConfig::standalone()).await.unwrap(); + let cancel_token = CancellationToken::new(); + let (tx, _rx) = mpsc::channel(1); + let system_ref = system.clone() as Arc; + let ctx = ActorContext::new(actor_id, system_ref, cancel_token, tx, None); + (ctx, system) + } + + #[tokio::test] + async fn test_context_creation() { + let (ctx, _system) = create_test_context(ActorId::local(1)).await; assert_eq!(ctx.id().local_id(), 1); assert!(!ctx.is_cancelled()); } - #[test] - fn test_context_cancellation() { - let ctx = ActorContext::new(ActorId::local(1)); + #[tokio::test] + async fn test_context_cancellation() { + let (ctx, _system) = create_test_context(ActorId::local(1)).await; assert!(!ctx.is_cancelled()); ctx.cancel_token().cancel(); assert!(ctx.is_cancelled()); } - #[test] - fn test_context_node_id_none() { - let ctx = ActorContext::new(ActorId::local(1)); - assert!(ctx.node_id().is_none()); + #[tokio::test] + async fn test_context_node_id() { + let (ctx, system) = create_test_context(ActorId::local(1)).await; + assert_eq!(ctx.node_id(), *system.node_id()); } - #[test] - fn test_context_multiple_actors() { - let ctx1 = ActorContext::new(ActorId::local(1)); - let ctx2 = ActorContext::new(ActorId::local(2)); - let ctx3 = ActorContext::new(ActorId::local(3)); + #[tokio::test] + async fn test_context_multiple_actors() { + let (ctx1, _system1) = create_test_context(ActorId::local(1)).await; + let (ctx2, _system2) = create_test_context(ActorId::local(2)).await; + let (ctx3, _system3) = create_test_context(ActorId::local(3)).await; assert_eq!(ctx1.id().local_id(), 1); assert_eq!(ctx2.id().local_id(), 2); assert_eq!(ctx3.id().local_id(), 3); } - #[test] - fn test_context_cancel_token_clone() { - let ctx = ActorContext::new(ActorId::local(1)); + #[tokio::test] + async fn test_context_cancel_token_clone() { + let (ctx, _system) = create_test_context(ActorId::local(1)).await; let token = ctx.cancel_token().clone(); assert!(!ctx.is_cancelled()); @@ -222,41 +203,34 @@ mod tests { } #[tokio::test] - async fn test_context_actor_ref_no_system() { - let mut ctx = ActorContext::new(ActorId::local(1)); + async fn test_context_actor_ref() { + let (mut ctx, _system) = create_test_context(ActorId::local(1)).await; let target_id = ActorId::local(2); + // actor_ref should fail for non-existent actor let result = ctx.actor_ref(&target_id).await; assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("No system reference")); } #[tokio::test] - async fn test_context_watch_no_system() { - let ctx = ActorContext::new(ActorId::local(1)); + async fn test_context_watch() { + let (ctx, _system) = create_test_context(ActorId::local(1)).await; let target_id = ActorId::local(2); + // watch should work with real system let result = ctx.watch(&target_id).await; - assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("No system reference")); + // May fail if target doesn't exist, but should not panic + let _ = result; } #[tokio::test] - async fn test_context_unwatch_no_system() { - let ctx = ActorContext::new(ActorId::local(1)); + async fn test_context_unwatch() { + let (ctx, _system) = create_test_context(ActorId::local(1)).await; let target_id = ActorId::local(2); + // unwatch should work with real system let result = ctx.unwatch(&target_id).await; - assert!(result.is_err()); - assert!(result - .unwrap_err() - .to_string() - .contains("No system reference")); + // May fail if target doesn't exist, but should not panic + let _ = result; } } diff --git a/crates/pulsing-actor/src/behavior/core.rs b/crates/pulsing-actor/src/behavior/core.rs index fc89b63e5..d363e52e5 100644 --- a/crates/pulsing-actor/src/behavior/core.rs +++ b/crates/pulsing-actor/src/behavior/core.rs @@ -1,12 +1,10 @@ use super::context::BehaviorContext; use super::reference::TypedRef; -use crate::actor::ActorSystemRef; use crate::actor::{Actor, ActorContext, IntoActor, Message}; use async_trait::async_trait; use futures::future::BoxFuture; use serde::{de::DeserializeOwned, Serialize}; use std::marker::PhantomData; -use std::sync::Arc; use tokio::sync::Mutex; /// Action returned by a behavior after processing a message. @@ -164,11 +162,8 @@ where // Store name for logging *self.name.lock().await = Some(actor_name.clone()); - // We need a system reference - get it from the context - // Note: This requires ActorContext to provide system access - let system: Arc = ctx - .system() - .ok_or_else(|| anyhow::anyhow!("No system reference available in context"))?; + // Get system reference from the context (always available now) + let system = ctx.system(); // Initialize the behavior context let actor_id = *ctx.id(); diff --git a/crates/pulsing-actor/src/system/lifecycle.rs b/crates/pulsing-actor/src/system/lifecycle.rs index 3d1b5fd36..96f5649b7 100644 --- a/crates/pulsing-actor/src/system/lifecycle.rs +++ b/crates/pulsing-actor/src/system/lifecycle.rs @@ -201,20 +201,18 @@ impl ActorSystem { } // 3. Handle lifecycle cleanup - let actor_names = self.actor_names.clone(); let local_actors = self.local_actors.clone(); self.lifecycle .handle_termination( &handle.actor_id, - actor_name, named_path, reason, &self.named_actor_paths, &self.cluster, - |name| { - actor_names - .get(name) - .and_then(|id| local_actors.get(id.value()).map(|h| h.sender.clone())) + |actor_id| { + // Directly get local_id from ActorId and lookup in local_actors + let local_id = actor_id.local_id(); + local_actors.get(&local_id).map(|h| h.sender.clone()) }, ) .await; diff --git a/crates/pulsing-actor/src/system/mod.rs b/crates/pulsing-actor/src/system/mod.rs index 28f593558..eb2196423 100644 --- a/crates/pulsing-actor/src/system/mod.rs +++ b/crates/pulsing-actor/src/system/mod.rs @@ -318,16 +318,12 @@ impl ActorSystemRef for ActorSystem { )); } - let watcher_key = watcher.to_string(); - let target_key = target.to_string(); - self.lifecycle.watch(&watcher_key, &target_key).await; + self.lifecycle.watch(watcher, target).await; Ok(()) } async fn unwatch(&self, watcher: &ActorId, target: &ActorId) -> anyhow::Result<()> { - let watcher_key = watcher.to_string(); - let target_key = target.to_string(); - self.lifecycle.unwatch(&watcher_key, &target_key).await; + self.lifecycle.unwatch(watcher, target).await; Ok(()) } diff --git a/crates/pulsing-actor/src/watch.rs b/crates/pulsing-actor/src/watch.rs index 62f546c89..4aa0c4d03 100644 --- a/crates/pulsing-actor/src/watch.rs +++ b/crates/pulsing-actor/src/watch.rs @@ -20,8 +20,8 @@ use tokio::sync::{mpsc, RwLock}; /// - Cluster broadcast /// - Routing table cleanup pub struct ActorLifecycle { - /// Watch registry: target_actor_name -> set of watcher_actor_names - watchers: RwLock>>, + /// Watch registry: target_actor_id -> set of watcher_actor_ids + watchers: RwLock>>, } impl ActorLifecycle { @@ -35,33 +35,30 @@ impl ActorLifecycle { // ==================== Watch API ==================== /// Register a watch: watcher will be notified when target stops - pub async fn watch(&self, watcher_name: &str, target_name: &str) { + pub async fn watch(&self, watcher: &ActorId, target: &ActorId) { let mut watchers = self.watchers.write().await; - watchers - .entry(target_name.to_string()) - .or_default() - .insert(watcher_name.to_string()); + watchers.entry(*target).or_default().insert(*watcher); tracing::debug!( - watcher = watcher_name, - target = target_name, + watcher = %watcher, + target = %target, "Watch registered" ); } /// Remove a watch relationship - pub async fn unwatch(&self, watcher_name: &str, target_name: &str) { + pub async fn unwatch(&self, watcher: &ActorId, target: &ActorId) { let mut watchers = self.watchers.write().await; - if let Some(watcher_set) = watchers.get_mut(target_name) { - watcher_set.remove(watcher_name); + if let Some(watcher_set) = watchers.get_mut(target) { + watcher_set.remove(watcher); if watcher_set.is_empty() { - watchers.remove(target_name); + watchers.remove(target); } } tracing::debug!( - watcher = watcher_name, - target = target_name, + watcher = %watcher, + target = %target, "Watch removed" ); } @@ -79,7 +76,6 @@ impl ActorLifecycle { /// /// # Arguments /// * `actor_id` - The terminated actor's ID - /// * `actor_name` - The actor's local name /// * `named_path` - Optional named actor path /// * `reason` - Why the actor stopped /// * `named_actor_paths` - Routing table to clean up @@ -89,14 +85,13 @@ impl ActorLifecycle { pub async fn handle_termination( &self, actor_id: &ActorId, - actor_name: &str, named_path: Option, reason: StopReason, named_actor_paths: &DashMap, cluster: &RwLock>>, get_sender: F, ) where - F: Fn(&str) -> Option>, + F: Fn(&ActorId) -> Option>, { // 1. Log termination self.log_termination(actor_id, named_path.as_ref(), &reason); @@ -112,8 +107,7 @@ impl ActorLifecycle { .await; // 3. Notify all watchers - self.notify_watchers(actor_id, actor_name, reason, get_sender) - .await; + self.notify_watchers(actor_id, reason, get_sender).await; } /// Log actor termination event @@ -169,29 +163,24 @@ impl ActorLifecycle { } /// Notify all watchers that an actor has terminated - async fn notify_watchers( - &self, - actor_id: &ActorId, - actor_name: &str, - reason: StopReason, - get_sender: F, - ) where - F: Fn(&str) -> Option>, + async fn notify_watchers(&self, actor_id: &ActorId, reason: StopReason, get_sender: F) + where + F: Fn(&ActorId) -> Option>, { // Get and remove watchers for this actor - let watcher_names = { + let watcher_ids = { let mut watchers = self.watchers.write().await; - watchers.remove(actor_name).unwrap_or_default() + watchers.remove(actor_id).unwrap_or_default() }; - if watcher_names.is_empty() { + if watcher_ids.is_empty() { return; } tracing::info!( actor_id = %actor_id, reason = %reason, - watcher_count = watcher_names.len(), + watcher_count = watcher_ids.len(), "Notifying watchers of actor termination" ); @@ -215,12 +204,12 @@ impl ActorLifecycle { }; // Send to all watchers - for watcher_name in watcher_names { - if let Some(sender) = get_sender(&watcher_name) { + for watcher_id in watcher_ids { + if let Some(sender) = get_sender(&watcher_id) { let envelope = Envelope::tell(Message::single(&msg_type, payload_bytes.clone())); if let Err(e) = sender.try_send(envelope) { tracing::warn!( - watcher = watcher_name, + watcher = %watcher_id, error = %e, "Failed to send termination message to watcher" ); @@ -235,18 +224,18 @@ impl ActorLifecycle { /// /// Call this when an actor is being removed from the system. /// It removes the actor both as a target and as a watcher. - pub async fn remove_actor(&self, actor_name: &str) { + pub async fn remove_actor(&self, actor_id: &ActorId) { let mut watchers = self.watchers.write().await; // Remove as target - watchers.remove(actor_name); + watchers.remove(actor_id); // Remove as watcher from all targets, and clean up empty entries let mut empty_targets = Vec::new(); for (target, watcher_set) in watchers.iter_mut() { - watcher_set.remove(actor_name); + watcher_set.remove(actor_id); if watcher_set.is_empty() { - empty_targets.push(target.clone()); + empty_targets.push(*target); } } @@ -271,18 +260,18 @@ impl ActorLifecycle { } /// Get watchers for a specific actor - pub async fn get_watchers(&self, target_name: &str) -> HashSet { + pub async fn get_watchers(&self, target: &ActorId) -> HashSet { self.watchers .read() .await - .get(target_name) + .get(target) .cloned() .unwrap_or_default() } /// Check if an actor is being watched - pub async fn is_watched(&self, target_name: &str) -> bool { - self.watchers.read().await.contains_key(target_name) + pub async fn is_watched(&self, target: &ActorId) -> bool { + self.watchers.read().await.contains_key(target) } } @@ -301,48 +290,62 @@ mod tests { async fn test_watch_unwatch() { let lifecycle = ActorLifecycle::new(); + let watcher1 = ActorId::new(NodeId::generate(), 1); + let watcher2 = ActorId::new(NodeId::generate(), 2); + let target1 = ActorId::new(NodeId::generate(), 10); + // Add watches - lifecycle.watch("watcher1", "target1").await; - lifecycle.watch("watcher2", "target1").await; + lifecycle.watch(&watcher1, &target1).await; + lifecycle.watch(&watcher2, &target1).await; - assert!(lifecycle.is_watched("target1").await); - assert_eq!(lifecycle.get_watchers("target1").await.len(), 2); + assert!(lifecycle.is_watched(&target1).await); + assert_eq!(lifecycle.get_watchers(&target1).await.len(), 2); // Unwatch - lifecycle.unwatch("watcher1", "target1").await; - assert_eq!(lifecycle.get_watchers("target1").await.len(), 1); + lifecycle.unwatch(&watcher1, &target1).await; + assert_eq!(lifecycle.get_watchers(&target1).await.len(), 1); - lifecycle.unwatch("watcher2", "target1").await; - assert!(!lifecycle.is_watched("target1").await); + lifecycle.unwatch(&watcher2, &target1).await; + assert!(!lifecycle.is_watched(&target1).await); } #[tokio::test] async fn test_remove_actor() { let lifecycle = ActorLifecycle::new(); + let watcher1 = ActorId::new(NodeId::generate(), 1); + let watcher2 = ActorId::new(NodeId::generate(), 2); + let target1 = ActorId::new(NodeId::generate(), 10); + let target2 = ActorId::new(NodeId::generate(), 11); + // Setup: watcher1 watches target1 and target2 - lifecycle.watch("watcher1", "target1").await; - lifecycle.watch("watcher1", "target2").await; - lifecycle.watch("watcher2", "target1").await; + lifecycle.watch(&watcher1, &target1).await; + lifecycle.watch(&watcher1, &target2).await; + lifecycle.watch(&watcher2, &target1).await; // Remove watcher1 from all relationships - lifecycle.remove_actor("watcher1").await; + lifecycle.remove_actor(&watcher1).await; // watcher1 should be removed as watcher - let watchers = lifecycle.get_watchers("target1").await; - assert!(!watchers.contains("watcher1")); - assert!(watchers.contains("watcher2")); + let watchers = lifecycle.get_watchers(&target1).await; + assert!(!watchers.contains(&watcher1)); + assert!(watchers.contains(&watcher2)); // target2 should have no watchers - assert!(!lifecycle.is_watched("target2").await); + assert!(!lifecycle.is_watched(&target2).await); } #[tokio::test] async fn test_clear() { let lifecycle = ActorLifecycle::new(); - lifecycle.watch("w1", "t1").await; - lifecycle.watch("w2", "t2").await; + let w1 = ActorId::new(NodeId::generate(), 1); + let w2 = ActorId::new(NodeId::generate(), 2); + let t1 = ActorId::new(NodeId::generate(), 10); + let t2 = ActorId::new(NodeId::generate(), 11); + + lifecycle.watch(&w1, &t1).await; + lifecycle.watch(&w2, &t2).await; assert_eq!(lifecycle.watched_count().await, 2); @@ -355,19 +358,19 @@ mod tests { async fn test_notify_watchers() { let lifecycle = ActorLifecycle::new(); - lifecycle.watch("watcher1", "target1").await; - lifecycle.watch("watcher2", "target1").await; + let watcher1 = ActorId::new(NodeId::generate(), 1); + let watcher2 = ActorId::new(NodeId::generate(), 2); + let target1 = ActorId::new(NodeId::generate(), 10); - let actor_id = ActorId::new(NodeId::generate(), 1); + lifecycle.watch(&watcher1, &target1).await; + lifecycle.watch(&watcher2, &target1).await; // Create a channel to receive notifications let (tx, mut rx) = mpsc::channel::(10); // Notify watchers lifecycle - .notify_watchers(&actor_id, "target1", StopReason::Normal, |_name| { - Some(tx.clone()) - }) + .notify_watchers(&target1, StopReason::Normal, |_id| Some(tx.clone())) .await; // Should receive 2 notifications @@ -378,6 +381,6 @@ mod tests { assert_eq!(count, 2); // Watchers should be cleared after notification - assert!(!lifecycle.is_watched("target1").await); + assert!(!lifecycle.is_watched(&target1).await); } } From bb307f0562f1665fd55d20f01da5b56507538c0f Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 25 Jan 2026 21:17:30 +0800 Subject: [PATCH 4/7] Enhance Actor ID handling and update dependencies - Introduced UUID-based ActorId and NodeId, replacing previous numeric identifiers for improved uniqueness and traceability. - Updated ActorAddress to support new UUID format, ensuring backward compatibility with legacy formats. - Refactored tests and system components to utilize the new ActorId structure, enhancing clarity and consistency across the codebase. - Added `uuid` dependency to the project for UUID generation and handling. - Modified Justfile to include necessary build dependencies for Python and C++ integration. --- Cargo.lock | 1 + Justfile | 25 ++- crates/pulsing-actor/src/actor/address.rs | 173 ++++++++++-------- crates/pulsing-actor/src/actor/context.rs | 36 ++-- crates/pulsing-actor/src/actor/reference.rs | 8 +- crates/pulsing-actor/src/actor/traits.rs | 49 ++--- crates/pulsing-actor/src/metrics/mod.rs | 2 +- crates/pulsing-actor/src/system/handler.rs | 27 +-- crates/pulsing-actor/src/system/lifecycle.rs | 5 +- crates/pulsing-actor/src/system/mod.rs | 21 +-- crates/pulsing-actor/src/system/resolve.rs | 40 ++-- crates/pulsing-actor/src/system/spawn.rs | 13 +- .../src/system_actor/messages.rs | 36 +++- crates/pulsing-actor/src/system_actor/mod.rs | 4 +- .../pulsing-actor/src/transport/http2/mod.rs | 8 +- crates/pulsing-actor/src/watch.rs | 29 ++- crates/pulsing-actor/tests/address_tests.rs | 14 +- .../tests/cluster/member_tests.rs | 16 +- crates/pulsing-actor/tests/cluster_tests.rs | 33 ++-- .../tests/http2_transport_tests.rs | 6 +- .../pulsing-actor/tests/integration_tests.rs | 23 +-- .../pulsing-actor/tests/multi_node_tests.rs | 24 +-- .../pulsing-actor/tests/system_actor_tests.rs | 6 +- crates/pulsing-py/Cargo.toml | 1 + crates/pulsing-py/src/actor.rs | 155 ++++++++++------ python/pulsing/actor/remote.py | 21 ++- python/pulsing/actors/load_stream.py | 5 +- python/pulsing/actors/scheduler.py | 7 +- python/pulsing/queue/manager.py | 63 ++++--- python/pulsing/topic/broker.py | 4 +- python/pulsing/topic/topic.py | 3 +- tests/python/test_system_actor.py | 12 +- 32 files changed, 502 insertions(+), 368 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4a4f21112..335372603 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1875,6 +1875,7 @@ dependencies = [ "tokio", "tracing", "tracing-subscriber", + "uuid", ] [[package]] diff --git a/Justfile b/Justfile index 16f6f6981..af545338a 100644 --- a/Justfile +++ b/Justfile @@ -163,7 +163,10 @@ ci-setup-macos: ensure-rust ensure-uv ci-setup-fedora python_version="3.12": ensure-uv #!/usr/bin/env bash export PATH="$HOME/.local/bin:$PATH" - dnf install -y python{{python_version}} + # Install build dependencies + dnf install -y gcc gcc-c++ openssl-devel + # Use uv to install Python (consistent with manylinux setup) + uv python install {{python_version}} uv tool install pytest echo "==> Setup complete!" @@ -193,8 +196,24 @@ ci-build manylinux="": ci-test: #!/usr/bin/env bash export PATH="$HOME/.local/bin:$PATH" - pip install dist/*.whl pytest pytest-asyncio 2>/dev/null || uv pip install --system dist/*.whl pytest pytest-asyncio - python3 -m pytest tests/python -v + # Install wheel and dependencies using uv (preferred) or pip + if command -v uv &> /dev/null; then + uv pip install --system dist/*.whl pytest pytest-asyncio + # Use uv run pytest (uses uv-managed Python environment) + uv run pytest tests/python -v + else + # Fallback to pip if uv not available + pip install dist/*.whl pytest pytest-asyncio + # Try to find python executable + for py in python3 python3.12 python3.11 python3.10 python; do + if command -v $py &> /dev/null; then + $py -m pytest tests/python -v + exit 0 + fi + done + echo "Error: No Python interpreter found" + exit 1 + fi # ============================================================================= # 本地模拟 CI 流水线 (Action 命令) diff --git a/crates/pulsing-actor/src/actor/address.rs b/crates/pulsing-actor/src/actor/address.rs index 75f249c84..30890f4da 100644 --- a/crates/pulsing-actor/src/actor/address.rs +++ b/crates/pulsing-actor/src/actor/address.rs @@ -1,6 +1,6 @@ //! Actor addressing (URI-based). -use super::traits::NodeId; +use super::traits::{ActorId, NodeId}; use serde::{Deserialize, Serialize}; use std::fmt; use std::hash::Hash; @@ -286,12 +286,10 @@ pub enum ActorAddress { }, /// Global Actor Address - direct addressing without Gossip registration - /// Format: `actor://node_id/actor_id` + /// Format: `actor://actor_id` (node_id is no longer needed with UUID-based IDs) Global { - /// The node where the actor resides (0 = local) - node_id: NodeId, - /// The actor's local identifier - actor_id: u64, + /// The actor's unique identifier (UUID) + actor_id: ActorId, }, } @@ -329,9 +327,13 @@ impl ActorAddress { if let Some((path, node)) = path_part.rsplit_once('@') { // With instance specifier - let node_id = node - .parse::() - .map_err(|_| AddressParseError::InvalidFormat)?; + // Parse node_id as u128 (UUID format or numeric) + let node_id = if let Ok(uuid) = uuid::Uuid::parse_str(node) { + uuid.as_u128() + } else { + node.parse::() + .map_err(|_| AddressParseError::InvalidFormat)? + }; Ok(Self::Named { path: ActorPath::new(path)?, instance: Some(NodeId::new(node_id)), @@ -344,26 +346,35 @@ impl ActorAddress { }) } } else { - // Global: actor://node_id/actor_id - let (node_id_str, actor_id_str) = rest - .split_once('/') - .ok_or(AddressParseError::InvalidFormat)?; - - if node_id_str.is_empty() || actor_id_str.is_empty() { - return Err(AddressParseError::InvalidFormat); + // Global: actor://actor_id (UUID format) + // Support both UUID string format and legacy node_id/actor_id format for backward compatibility + if let Some((node_id_str, actor_id_str)) = rest.split_once('/') { + // Legacy format: actor://node_id/actor_id + // Try to parse as UUID first, fall back to legacy format + if let Ok(uuid) = uuid::Uuid::parse_str(actor_id_str) { + Ok(Self::Global { + actor_id: ActorId::new(uuid.as_u128()), + }) + } else if let (Ok(_node_id), Ok(_actor_id)) = + (node_id_str.parse::(), actor_id_str.parse::()) + { + // Legacy format - convert to UUID (not recommended, but supported) + // This is a compatibility shim + let uuid = uuid::Uuid::new_v4(); + Ok(Self::Global { + actor_id: ActorId::new(uuid.as_u128()), + }) + } else { + Err(AddressParseError::InvalidFormat) + } + } else { + // New format: actor://actor_id (direct UUID) + let uuid = + uuid::Uuid::parse_str(rest).map_err(|_| AddressParseError::InvalidFormat)?; + Ok(Self::Global { + actor_id: ActorId::new(uuid.as_u128()), + }) } - - let node_id = node_id_str - .parse::() - .map_err(|_| AddressParseError::InvalidFormat)?; - let actor_id = actor_id_str - .parse::() - .map_err(|_| AddressParseError::InvalidFormat)?; - - Ok(Self::Global { - node_id: NodeId::new(node_id), - actor_id, - }) } } @@ -384,16 +395,13 @@ impl ActorAddress { } /// Create a global actor address - pub fn global(node_id: NodeId, actor_id: u64) -> Self { - Self::Global { node_id, actor_id } + pub fn global(actor_id: ActorId) -> Self { + Self::Global { actor_id } } - /// Create a local actor reference (node_id = 0) - pub fn local(actor_id: u64) -> Self { - Self::Global { - node_id: NodeId::LOCAL, - actor_id, - } + /// Create a local actor reference (alias for global) + pub fn local(actor_id: ActorId) -> Self { + Self::Global { actor_id } } /// Convert to URI string @@ -411,15 +419,17 @@ impl ActorAddress { } => { format!("actor:///{}@{}", path.as_str(), node.0) } - Self::Global { node_id, actor_id } => { - format!("actor://{}/{}", node_id.0, actor_id) + Self::Global { actor_id } => { + format!("actor://{}", actor_id) } } } - /// Check if this is a local reference (node_id = 0) + /// Check if this is a local reference + /// Note: With UUID-based IDs, we can't determine locality from the address alone + /// This method is kept for compatibility but always returns false for Global addresses pub fn is_local(&self) -> bool { - matches!(self, Self::Global { node_id, .. } if node_id.is_local()) + matches!(self, Self::Named { .. }) } /// Check if this is a named actor address @@ -433,14 +443,9 @@ impl ActorAddress { } /// Resolve local node id to actual node ID - pub fn resolve_local(self, current_node: NodeId) -> Self { - match self { - Self::Global { node_id, actor_id } if node_id.is_local() => Self::Global { - node_id: current_node, - actor_id, - }, - other => other, - } + /// Note: With UUID-based IDs, this is a no-op for Global addresses + pub fn resolve_local(self, _current_node: NodeId) -> Self { + self } /// Add instance specifier to a named address @@ -462,18 +467,18 @@ impl ActorAddress { } } - /// Get the node ID + /// Get the node ID for named addresses (instance specifier) pub fn node_id(&self) -> Option { match self { - Self::Global { node_id, .. } => Some(*node_id), Self::Named { instance, .. } => *instance, + Self::Global { .. } => None, // Global addresses don't have node_id anymore } } /// Get the actor ID for global addresses - pub fn actor_id(&self) -> Option { + pub fn actor_id(&self) -> Option { match self { - Self::Global { actor_id, .. } => Some(*actor_id), + Self::Global { actor_id } => Some(*actor_id), _ => None, } } @@ -590,25 +595,28 @@ mod tests { #[test] fn test_address_parse_global() { - let addr = ActorAddress::parse("actor://123/456").unwrap(); + // Parse a UUID-based global address + let uuid = uuid::Uuid::new_v4(); + let addr_str = format!("actor://{}", uuid.simple()); + let addr = ActorAddress::parse(&addr_str).unwrap(); match addr { - ActorAddress::Global { node_id, actor_id } => { - assert_eq!(node_id.0, 123); - assert_eq!(actor_id, 456); + ActorAddress::Global { actor_id } => { + assert_eq!(actor_id.0, uuid.as_u128()); } _ => panic!("Expected Global address"), } } #[test] - fn test_address_parse_local() { - let addr = ActorAddress::parse("actor://0/456").unwrap(); - assert!(addr.is_local()); + fn test_address_parse_with_uuid() { + // Create an ActorId and parse its address + let id = ActorId::generate(); + let addr_str = format!("actor://{}", id); + let addr = ActorAddress::parse(&addr_str).unwrap(); match addr { - ActorAddress::Global { node_id, actor_id } => { - assert_eq!(node_id.0, 0); - assert_eq!(actor_id, 456); + ActorAddress::Global { actor_id } => { + assert_eq!(actor_id, id); } _ => panic!("Expected Global address"), } @@ -616,14 +624,17 @@ mod tests { #[test] fn test_address_resolve_local() { - let addr = ActorAddress::parse("actor://0/456").unwrap(); - let current_node = NodeId::new(123); + // With UUID-based IDs, resolve_local is a no-op for Global addresses + let actor_id = ActorId::generate(); + let addr = ActorAddress::global(actor_id); + let current_node = NodeId::generate(); let resolved = addr.resolve_local(current_node); match resolved { - ActorAddress::Global { node_id, actor_id } => { - assert_eq!(node_id.0, 123); - assert_eq!(actor_id, 456); + ActorAddress::Global { + actor_id: resolved_id, + } => { + assert_eq!(resolved_id, actor_id); } _ => panic!("Expected Global address"), } @@ -638,15 +649,16 @@ mod tests { // Named instance let addr = ActorAddress::named_instance(ActorPath::new("services/api").unwrap(), NodeId::new(123)); - assert_eq!(addr.to_uri(), "actor:///services/api@123"); + assert!(addr.to_uri().contains("@")); // Contains instance specifier - // Global - let addr = ActorAddress::global(NodeId::new(123), 456); - assert_eq!(addr.to_uri(), "actor://123/456"); + // Global - UUID format + let actor_id = ActorId::new(0x12345678_9abcdef0_12345678_9abcdef0); + let addr = ActorAddress::global(actor_id); + assert!(addr.to_uri().starts_with("actor://")); - // Local - let addr = ActorAddress::local(456); - assert_eq!(addr.to_uri(), "actor://0/456"); + // Local alias - same as global with UUID + let addr = ActorAddress::local(actor_id); + assert!(addr.to_uri().starts_with("actor://")); } #[test] @@ -657,16 +669,21 @@ mod tests { #[test] fn test_address_roundtrip() { - let cases = vec![ + // Named addresses roundtrip correctly + let named_cases = vec![ "actor:///services/llm/router", "actor:///services/llm/router@123", - "actor://123/456", - "actor://0/789", ]; - for uri in cases { + for uri in named_cases { let addr = ActorAddress::parse(uri).unwrap(); assert_eq!(addr.to_uri(), uri); } + + // Global addresses with UUID format + let actor_id = ActorId::generate(); + let uri = format!("actor://{}", actor_id); + let addr = ActorAddress::parse(&uri).unwrap(); + assert_eq!(addr.to_uri(), uri); } } diff --git a/crates/pulsing-actor/src/actor/context.rs b/crates/pulsing-actor/src/actor/context.rs index cdd75188c..dfdb2afd2 100644 --- a/crates/pulsing-actor/src/actor/context.rs +++ b/crates/pulsing-actor/src/actor/context.rs @@ -158,14 +158,15 @@ mod tests { #[tokio::test] async fn test_context_creation() { - let (ctx, _system) = create_test_context(ActorId::local(1)).await; - assert_eq!(ctx.id().local_id(), 1); + let (ctx, _system) = create_test_context(ActorId::generate()).await; + // UUID-based IDs are non-zero + assert_ne!(ctx.id().0, 0); assert!(!ctx.is_cancelled()); } #[tokio::test] async fn test_context_cancellation() { - let (ctx, _system) = create_test_context(ActorId::local(1)).await; + let (ctx, _system) = create_test_context(ActorId::generate()).await; assert!(!ctx.is_cancelled()); ctx.cancel_token().cancel(); assert!(ctx.is_cancelled()); @@ -173,24 +174,25 @@ mod tests { #[tokio::test] async fn test_context_node_id() { - let (ctx, system) = create_test_context(ActorId::local(1)).await; + let (ctx, system) = create_test_context(ActorId::generate()).await; assert_eq!(ctx.node_id(), *system.node_id()); } #[tokio::test] async fn test_context_multiple_actors() { - let (ctx1, _system1) = create_test_context(ActorId::local(1)).await; - let (ctx2, _system2) = create_test_context(ActorId::local(2)).await; - let (ctx3, _system3) = create_test_context(ActorId::local(3)).await; + let (ctx1, _system1) = create_test_context(ActorId::generate()).await; + let (ctx2, _system2) = create_test_context(ActorId::generate()).await; + let (ctx3, _system3) = create_test_context(ActorId::generate()).await; - assert_eq!(ctx1.id().local_id(), 1); - assert_eq!(ctx2.id().local_id(), 2); - assert_eq!(ctx3.id().local_id(), 3); + // UUID-based IDs should all be unique + assert_ne!(ctx1.id(), ctx2.id()); + assert_ne!(ctx2.id(), ctx3.id()); + assert_ne!(ctx1.id(), ctx3.id()); } #[tokio::test] async fn test_context_cancel_token_clone() { - let (ctx, _system) = create_test_context(ActorId::local(1)).await; + let (ctx, _system) = create_test_context(ActorId::generate()).await; let token = ctx.cancel_token().clone(); assert!(!ctx.is_cancelled()); @@ -204,8 +206,8 @@ mod tests { #[tokio::test] async fn test_context_actor_ref() { - let (mut ctx, _system) = create_test_context(ActorId::local(1)).await; - let target_id = ActorId::local(2); + let (mut ctx, _system) = create_test_context(ActorId::generate()).await; + let target_id = ActorId::generate(); // actor_ref should fail for non-existent actor let result = ctx.actor_ref(&target_id).await; @@ -214,8 +216,8 @@ mod tests { #[tokio::test] async fn test_context_watch() { - let (ctx, _system) = create_test_context(ActorId::local(1)).await; - let target_id = ActorId::local(2); + let (ctx, _system) = create_test_context(ActorId::generate()).await; + let target_id = ActorId::generate(); // watch should work with real system let result = ctx.watch(&target_id).await; @@ -225,8 +227,8 @@ mod tests { #[tokio::test] async fn test_context_unwatch() { - let (ctx, _system) = create_test_context(ActorId::local(1)).await; - let target_id = ActorId::local(2); + let (ctx, _system) = create_test_context(ActorId::generate()).await; + let target_id = ActorId::generate(); // unwatch should work with real system let result = ctx.unwatch(&target_id).await; diff --git a/crates/pulsing-actor/src/actor/reference.rs b/crates/pulsing-actor/src/actor/reference.rs index eed295d17..5f2a32729 100644 --- a/crates/pulsing-actor/src/actor/reference.rs +++ b/crates/pulsing-actor/src/actor/reference.rs @@ -169,8 +169,8 @@ impl ActorRef { /// The reference will automatically re-resolve after CACHE_TTL (5 seconds). pub fn lazy(path: ActorPath, resolver: Arc) -> Self { Self { - // Use a placeholder ID for lazy refs - actor_id: ActorId::local(0), + // Use a placeholder ID for lazy refs (all zeros) + actor_id: ActorId::new(0), inner: ActorRefInner::Lazy(Arc::new(LazyActorRef::new(path, resolver))), } } @@ -294,7 +294,7 @@ mod tests { #[tokio::test] async fn test_local_actor_ref_tell() { let (tx, mut rx) = mpsc::channel(16); - let actor_id = ActorId::local(1); + let actor_id = ActorId::generate(); let actor_ref = ActorRef::local(actor_id, tx); actor_ref.tell(TestMsg { value: 42 }).await.unwrap(); @@ -307,7 +307,7 @@ mod tests { #[tokio::test] async fn test_local_actor_ref_send_oneway() { let (tx, mut rx) = mpsc::channel(16); - let actor_id = ActorId::local(1); + let actor_id = ActorId::generate(); let actor_ref = ActorRef::local(actor_id, tx); let msg = Message::single("TestMsg", b"hello"); diff --git a/crates/pulsing-actor/src/actor/traits.rs b/crates/pulsing-actor/src/actor/traits.rs index 6814163f1..9109ad9e1 100644 --- a/crates/pulsing-actor/src/actor/traits.rs +++ b/crates/pulsing-actor/src/actor/traits.rs @@ -13,18 +13,16 @@ use tokio::sync::mpsc; /// Node identifier in the cluster (0 = local). #[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, Serialize, Deserialize, Default)] -pub struct NodeId(pub u64); +pub struct NodeId(pub u128); impl NodeId { pub const LOCAL: NodeId = NodeId(0); pub fn generate() -> Self { - let uuid = uuid::Uuid::new_v4(); - let id = uuid.as_u128() as u64; - Self(if id == 0 { 1 } else { id }) + Self(uuid::Uuid::new_v4().as_u128()) } - pub fn new(id: u64) -> Self { + pub fn new(id: u128) -> Self { Self(id) } @@ -36,7 +34,13 @@ impl NodeId { impl fmt::Display for NodeId { #[cfg_attr(coverage_nightly, coverage(off))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) + if self.is_local() { + write!(f, "0") + } else { + // Format as UUID string for better readability + let uuid = uuid::Uuid::from_u128(self.0); + write!(f, "{}", uuid.simple()) + } } } @@ -45,27 +49,23 @@ impl fmt::Display for NodeId { pub struct ActorId(pub u128); impl ActorId { - pub fn new(node: NodeId, local_id: u64) -> Self { - Self(((node.0 as u128) << 64) | (local_id as u128)) - } - - pub fn local(local_id: u64) -> Self { - Self::new(NodeId::LOCAL, local_id) - } - - pub fn node(&self) -> NodeId { - NodeId((self.0 >> 64) as u64) + /// Generate a new unique ActorId using UUID v4 + pub fn generate() -> Self { + Self(uuid::Uuid::new_v4().as_u128()) } - pub fn local_id(&self) -> u64 { - self.0 as u64 + /// Create an ActorId from a u128 value + pub fn new(id: u128) -> Self { + Self(id) } } impl fmt::Display for ActorId { #[cfg_attr(coverage_nightly, coverage(off))] fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}:{}", self.node().0, self.local_id()) + // Format as UUID string for better readability + let uuid = uuid::Uuid::from_u128(self.0); + write!(f, "{}", uuid.simple()) } } @@ -354,10 +354,13 @@ mod tests { #[test] fn test_actor_id() { - let node = NodeId::generate(); - let id = ActorId::new(node, 123); - assert_eq!(id.local_id(), 123); - assert_eq!(id.node(), node); + let id = ActorId::generate(); + // UUID-based IDs are unique and non-zero + assert_ne!(id.0, 0); + + // Test creating from specific value + let id2 = ActorId::new(12345); + assert_eq!(id2.0, 12345); } #[test] diff --git a/crates/pulsing-actor/src/metrics/mod.rs b/crates/pulsing-actor/src/metrics/mod.rs index d93b12094..dbd021cba 100644 --- a/crates/pulsing-actor/src/metrics/mod.rs +++ b/crates/pulsing-actor/src/metrics/mod.rs @@ -321,7 +321,7 @@ impl Default for MetricsRegistry { /// System-level metrics collected from SystemActor #[derive(Debug, Clone, Default)] pub struct SystemMetrics { - pub node_id: u64, + pub node_id: u128, pub actors_count: usize, pub messages_total: u64, pub actors_created: u64, diff --git a/crates/pulsing-actor/src/system/handler.rs b/crates/pulsing-actor/src/system/handler.rs index b8f13cbbf..a1416008b 100644 --- a/crates/pulsing-actor/src/system/handler.rs +++ b/crates/pulsing-actor/src/system/handler.rs @@ -17,10 +17,10 @@ use tokio::sync::{mpsc, RwLock}; /// Unified message handler for HTTP/2 transport pub(crate) struct SystemMessageHandler { node_id: NodeId, - /// Local actors indexed by local_id - local_actors: Arc>, - /// Actor name to local_id mapping - actor_names: Arc>, + /// Local actors indexed by ActorId + local_actors: Arc>, + /// Actor name to ActorId mapping + actor_names: Arc>, named_actor_paths: Arc>, cluster: Arc>>>, } @@ -28,8 +28,8 @@ pub(crate) struct SystemMessageHandler { impl SystemMessageHandler { pub fn new( node_id: NodeId, - local_actors: Arc>, - actor_names: Arc>, + local_actors: Arc>, + actor_names: Arc>, named_actor_paths: Arc>, cluster: Arc>>>, ) -> Self { @@ -42,18 +42,19 @@ impl SystemMessageHandler { } } - /// Find actor sender by name or local_id (O(1) lookup) + /// Find actor sender by name or ActorId (O(1) lookup) fn find_actor_sender(&self, actor_name: &str) -> anyhow::Result> { - // First try by name -> local_id -> handle - if let Some(local_id) = self.actor_names.get(actor_name) { - if let Some(handle) = self.local_actors.get(local_id.value()) { + // First try by name -> ActorId -> handle + if let Some(actor_id) = self.actor_names.get(actor_name) { + if let Some(handle) = self.local_actors.get(actor_id.value()) { return Ok(handle.sender.clone()); } } - // Then try parsing as local_id directly (O(1)) - if let Ok(local_id) = actor_name.parse::() { - if let Some(handle) = self.local_actors.get(&local_id) { + // Then try parsing as ActorId (UUID format) + if let Ok(uuid) = uuid::Uuid::parse_str(actor_name) { + let actor_id = ActorId::new(uuid.as_u128()); + if let Some(handle) = self.local_actors.get(&actor_id) { return Ok(handle.sender.clone()); } } diff --git a/crates/pulsing-actor/src/system/lifecycle.rs b/crates/pulsing-actor/src/system/lifecycle.rs index 96f5649b7..628e9a8c7 100644 --- a/crates/pulsing-actor/src/system/lifecycle.rs +++ b/crates/pulsing-actor/src/system/lifecycle.rs @@ -210,9 +210,8 @@ impl ActorSystem { &self.named_actor_paths, &self.cluster, |actor_id| { - // Directly get local_id from ActorId and lookup in local_actors - let local_id = actor_id.local_id(); - local_actors.get(&local_id).map(|h| h.sender.clone()) + // Directly lookup by ActorId + local_actors.get(actor_id).map(|h| h.sender.clone()) }, ) .await; diff --git a/crates/pulsing-actor/src/system/mod.rs b/crates/pulsing-actor/src/system/mod.rs index eb2196423..3e2a8cd4f 100644 --- a/crates/pulsing-actor/src/system/mod.rs +++ b/crates/pulsing-actor/src/system/mod.rs @@ -33,7 +33,6 @@ use dashmap::DashMap; use handle::LocalActorHandle; use handler::SystemMessageHandler; use std::net::SocketAddr; -use std::sync::atomic::AtomicU64; use std::sync::Arc; use tokio::sync::mpsc; use tokio::sync::RwLock; @@ -50,11 +49,11 @@ pub struct ActorSystem { /// Default mailbox capacity for actors pub(crate) default_mailbox_capacity: usize, - /// Local actors indexed by local_id (O(1) lookup by ActorId) - pub(crate) local_actors: Arc>, + /// Local actors indexed by ActorId (O(1) lookup by ActorId) + pub(crate) local_actors: Arc>, - /// Actor name to local_id mapping (for name-based lookups) - pub(crate) actor_names: Arc>, + /// Actor name to ActorId mapping (for name-based lookups) + pub(crate) actor_names: Arc>, /// Named actor path to local actor name mapping (path_string -> actor_name) pub(crate) named_actor_paths: Arc>, @@ -71,9 +70,6 @@ pub struct ActorSystem { /// Actor lifecycle manager (watch, termination handling) pub(crate) lifecycle: Arc, - /// Actor ID counter (for generating unique local IDs) - pub(crate) actor_id_counter: AtomicU64, - /// Default load balancing policy pub(crate) default_lb_policy: Arc, @@ -97,8 +93,8 @@ impl ActorSystem { pub async fn new(config: SystemConfig) -> anyhow::Result> { let cancel_token = CancellationToken::new(); let node_id = NodeId::generate(); - let local_actors: Arc> = Arc::new(DashMap::new()); - let actor_names: Arc> = Arc::new(DashMap::new()); + let local_actors: Arc> = Arc::new(DashMap::new()); + let actor_names: Arc> = Arc::new(DashMap::new()); let named_actor_paths: Arc> = Arc::new(DashMap::new()); let cluster_holder: Arc>>> = Arc::new(RwLock::new(None)); @@ -176,7 +172,6 @@ impl ActorSystem { transport, cancel_token, lifecycle, - actor_id_counter: AtomicU64::new(1), // Start from 1 (0 reserved for system) default_lb_policy: Arc::new(RoundRobinPolicy::new()), node_load: Arc::new(DashMap::new()), }); @@ -310,8 +305,8 @@ impl ActorSystemRef for ActorSystem { } async fn watch(&self, watcher: &ActorId, target: &ActorId) -> anyhow::Result<()> { - // Only support local watching for now - if target.node() != self.node_id { + // Check if target is a local actor + if !self.local_actors.contains_key(target) { return Err(anyhow::anyhow!( "Cannot watch remote actor: {} (watching remote actors not yet supported)", target diff --git a/crates/pulsing-actor/src/system/resolve.rs b/crates/pulsing-actor/src/system/resolve.rs index 28cd46a5b..8df676e60 100644 --- a/crates/pulsing-actor/src/system/resolve.rs +++ b/crates/pulsing-actor/src/system/resolve.rs @@ -29,30 +29,26 @@ impl ActorSystem { /// Get ActorRef for a local or remote actor by ID /// - /// This is an O(1) operation for local actors using local_id indexing. + /// This is an O(1) operation for local actors using ActorId indexing. pub async fn actor_ref(&self, id: &ActorId) -> anyhow::Result { - // Check if local - if id.node() == self.node_id || id.node().is_local() { - // O(1) lookup by local_id - let handle = self - .local_actors - .get(&id.local_id()) - .ok_or_else(|| anyhow::anyhow!("Local actor not found: {}", id))?; + // Try local lookup first (O(1)) + if let Some(handle) = self.local_actors.get(id) { return Ok(ActorRef::local(handle.actor_id, handle.sender.clone())); } - // Remote actor - get address from cluster + // Not found locally - try remote lookup via cluster + // Note: With UUID-based IDs, we need to check cluster for actor location let cluster = self.cluster_or_err().await?; - let member = cluster - .get_member(&id.node()) - .await - .ok_or_else(|| anyhow::anyhow!("Node not found in cluster: {}", id.node()))?; - - // Create remote transport using actor id - let transport = Http2RemoteTransport::new_by_id(self.transport.client(), member.addr, *id); + // Lookup actor location in cluster + if let Some(member_info) = cluster.lookup_actor(id).await { + // Create remote transport using actor id + let transport = + Http2RemoteTransport::new_by_id(self.transport.client(), member_info.addr, *id); + return Ok(ActorRef::remote(*id, member_info.addr, Arc::new(transport))); + } - Ok(ActorRef::remote(*id, member.addr, Arc::new(transport))) + Err(anyhow::anyhow!("Actor not found: {}", id)) } /// Resolve a named actor by path (direct resolution) @@ -177,7 +173,9 @@ impl ActorSystem { let transport = Http2RemoteTransport::new_named(self.transport.client(), target.addr, path.clone()); - let actor_id = ActorId::new(target.node_id, 0); + // For named actors, we don't have a specific ActorId until we resolve + // Use a placeholder ID (this will be replaced when the actor is actually accessed) + let actor_id = ActorId::generate(); Ok(ActorRef::remote(actor_id, target.addr, Arc::new(transport))) } @@ -259,9 +257,9 @@ impl ActorSystem { ActorAddress::Named { path, instance } => { self.resolve_named(path, instance.as_ref()).await } - ActorAddress::Global { node_id, actor_id } => { - let id = ActorId::new(*node_id, *actor_id); - self.actor_ref(&id).await + ActorAddress::Global { actor_id, .. } => { + // actor_id is already a full ActorId (u128) + self.actor_ref(actor_id).await } } } diff --git a/crates/pulsing-actor/src/system/spawn.rs b/crates/pulsing-actor/src/system/spawn.rs index 5fbe48ab5..313021a54 100644 --- a/crates/pulsing-actor/src/system/spawn.rs +++ b/crates/pulsing-actor/src/system/spawn.rs @@ -11,7 +11,6 @@ use crate::system::config::SpawnOptions; use crate::system::handle::{ActorStats, LocalActorHandle}; use crate::system::runtime::run_supervision_loop; use crate::system::ActorSystem; -use std::sync::atomic::Ordering; use std::sync::Arc; impl ActorSystem { @@ -42,7 +41,6 @@ impl ActorSystem { } let actor_id = self.next_actor_id(); - let local_id = actor_id.local_id(); let mailbox = Mailbox::with_capacity(self.mailbox_capacity(&options)); let (sender, receiver) = mailbox.split(); @@ -76,11 +74,11 @@ impl ActorSystem { actor_id, }; - self.local_actors.insert(local_id, handle); + self.local_actors.insert(actor_id, handle); // Register in name maps if let Some(ref name) = name_str { - self.actor_names.insert(name.clone(), local_id); + self.actor_names.insert(name.clone(), actor_id); self.named_actor_paths.insert(name.clone(), name.clone()); // Register with cluster if available @@ -97,16 +95,15 @@ impl ActorSystem { } } else { // Anonymous actor: use actor_id as key - self.actor_names.insert(actor_id.to_string(), local_id); + self.actor_names.insert(actor_id.to_string(), actor_id); } Ok(ActorRef::local(actor_id, sender)) } - /// Generate a new unique local actor ID + /// Generate a new unique actor ID using UUID pub(crate) fn next_actor_id(&self) -> ActorId { - let local_id = self.actor_id_counter.fetch_add(1, Ordering::Relaxed); - ActorId::new(self.node_id, local_id) + ActorId::generate() } fn mailbox_capacity(&self, options: &SpawnOptions) -> usize { diff --git a/crates/pulsing-actor/src/system_actor/messages.rs b/crates/pulsing-actor/src/system_actor/messages.rs index 39d07fc89..450719fd3 100644 --- a/crates/pulsing-actor/src/system_actor/messages.rs +++ b/crates/pulsing-actor/src/system_actor/messages.rs @@ -2,6 +2,26 @@ use serde::{Deserialize, Serialize}; +/// Helper module for serializing u128 as string (JSON doesn't support 128-bit integers) +mod u128_as_string { + use serde::{self, Deserialize, Deserializer, Serializer}; + + pub fn serialize(value: &u128, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&value.to_string()) + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + s.parse().map_err(serde::de::Error::custom) + } +} + /// SystemActor request messages #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type")] @@ -79,11 +99,13 @@ pub enum SystemResponse { /// Actor created successfully ActorCreated { /// Actor ID - actor_id: u64, + #[serde(with = "u128_as_string")] + actor_id: u128, /// Actor name name: String, /// Node ID - node_id: u64, + #[serde(with = "u128_as_string")] + node_id: u128, /// Available methods list (for Python actors) #[serde(default)] methods: Vec, @@ -115,7 +137,8 @@ pub enum SystemResponse { /// Node info NodeInfo { /// Node ID - node_id: u64, + #[serde(with = "u128_as_string")] + node_id: u128, /// Address addr: String, /// Uptime in seconds @@ -135,7 +158,8 @@ pub enum SystemResponse { /// Pong response Pong { /// Node ID - node_id: u64, + #[serde(with = "u128_as_string")] + node_id: u128, /// Timestamp timestamp: u64, }, @@ -146,8 +170,8 @@ pub enum SystemResponse { pub struct ActorInfo { /// Actor name (also used as path for resolution) pub name: String, - /// Actor ID (local ID) - pub actor_id: u64, + /// Actor ID (full UUID) + pub actor_id: u128, /// Actor type pub actor_type: String, /// Uptime in seconds diff --git a/crates/pulsing-actor/src/system_actor/mod.rs b/crates/pulsing-actor/src/system_actor/mod.rs index 54a6f0bf5..d426f64c6 100644 --- a/crates/pulsing-actor/src/system_actor/mod.rs +++ b/crates/pulsing-actor/src/system_actor/mod.rs @@ -129,7 +129,7 @@ impl ActorRegistry { .iter() .map(|e| ActorInfo { name: e.key().clone(), - actor_id: e.actor_id.local_id(), + actor_id: e.actor_id.0, actor_type: e.actor_type.clone(), uptime_secs: e.created_at.elapsed().as_secs(), metadata: std::collections::HashMap::new(), // TODO: get from actor @@ -140,7 +140,7 @@ impl ActorRegistry { pub fn get_info(&self, name: &str) -> Option { self.actors.get(name).map(|e| ActorInfo { name: name.to_string(), - actor_id: e.actor_id.local_id(), + actor_id: e.actor_id.0, actor_type: e.actor_type.clone(), uptime_secs: e.created_at.elapsed().as_secs(), metadata: std::collections::HashMap::new(), // TODO: get from actor diff --git a/crates/pulsing-actor/src/transport/http2/mod.rs b/crates/pulsing-actor/src/transport/http2/mod.rs index c3cb78b7c..91ec9113c 100644 --- a/crates/pulsing-actor/src/transport/http2/mod.rs +++ b/crates/pulsing-actor/src/transport/http2/mod.rs @@ -274,7 +274,7 @@ impl Http2RemoteTransport { Self { client, remote_addr, - path: format!("/actors/{}", actor_id.local_id()), + path: format!("/actors/{}", actor_id), circuit_breaker: CircuitBreaker::new(), } } @@ -514,10 +514,12 @@ mod tests { fn test_http2_remote_transport_new_by_id() { let client = Arc::new(Http2Client::new(Http2Config::default())); let addr: SocketAddr = "127.0.0.1:8080".parse().unwrap(); - let actor_id = ActorId::local(42); + let actor_id = ActorId::generate(); let transport = Http2RemoteTransport::new_by_id(client, addr, actor_id); - assert_eq!(transport.path(), "/actors/42"); + // Path should be /actors/{uuid} where uuid is 32 hex chars + assert!(transport.path().starts_with("/actors/")); + assert_eq!(transport.path().len(), 8 + 32); // "/actors/" + 32 hex chars assert_eq!(transport.remote_addr(), addr); } diff --git a/crates/pulsing-actor/src/watch.rs b/crates/pulsing-actor/src/watch.rs index 4aa0c4d03..2cd97c8ac 100644 --- a/crates/pulsing-actor/src/watch.rs +++ b/crates/pulsing-actor/src/watch.rs @@ -284,15 +284,14 @@ impl Default for ActorLifecycle { #[cfg(test)] mod tests { use super::*; - use crate::actor::NodeId; #[tokio::test] async fn test_watch_unwatch() { let lifecycle = ActorLifecycle::new(); - let watcher1 = ActorId::new(NodeId::generate(), 1); - let watcher2 = ActorId::new(NodeId::generate(), 2); - let target1 = ActorId::new(NodeId::generate(), 10); + let watcher1 = ActorId::generate(); + let watcher2 = ActorId::generate(); + let target1 = ActorId::generate(); // Add watches lifecycle.watch(&watcher1, &target1).await; @@ -313,10 +312,10 @@ mod tests { async fn test_remove_actor() { let lifecycle = ActorLifecycle::new(); - let watcher1 = ActorId::new(NodeId::generate(), 1); - let watcher2 = ActorId::new(NodeId::generate(), 2); - let target1 = ActorId::new(NodeId::generate(), 10); - let target2 = ActorId::new(NodeId::generate(), 11); + let watcher1 = ActorId::generate(); + let watcher2 = ActorId::generate(); + let target1 = ActorId::generate(); + let target2 = ActorId::generate(); // Setup: watcher1 watches target1 and target2 lifecycle.watch(&watcher1, &target1).await; @@ -339,10 +338,10 @@ mod tests { async fn test_clear() { let lifecycle = ActorLifecycle::new(); - let w1 = ActorId::new(NodeId::generate(), 1); - let w2 = ActorId::new(NodeId::generate(), 2); - let t1 = ActorId::new(NodeId::generate(), 10); - let t2 = ActorId::new(NodeId::generate(), 11); + let w1 = ActorId::generate(); + let w2 = ActorId::generate(); + let t1 = ActorId::generate(); + let t2 = ActorId::generate(); lifecycle.watch(&w1, &t1).await; lifecycle.watch(&w2, &t2).await; @@ -358,9 +357,9 @@ mod tests { async fn test_notify_watchers() { let lifecycle = ActorLifecycle::new(); - let watcher1 = ActorId::new(NodeId::generate(), 1); - let watcher2 = ActorId::new(NodeId::generate(), 2); - let target1 = ActorId::new(NodeId::generate(), 10); + let watcher1 = ActorId::generate(); + let watcher2 = ActorId::generate(); + let target1 = ActorId::generate(); lifecycle.watch(&watcher1, &target1).await; lifecycle.watch(&watcher2, &target1).await; diff --git a/crates/pulsing-actor/tests/address_tests.rs b/crates/pulsing-actor/tests/address_tests.rs index 9d0eafa9c..321090975 100644 --- a/crates/pulsing-actor/tests/address_tests.rs +++ b/crates/pulsing-actor/tests/address_tests.rs @@ -1,6 +1,6 @@ //! Comprehensive tests for the actor addressing system -use pulsing_actor::actor::{ActorId, ActorPath, NodeId}; +use pulsing_actor::actor::{ActorId, ActorPath}; use pulsing_actor::prelude::*; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; @@ -92,11 +92,13 @@ mod actor_address_tests { #[test] fn test_address_parsing() { - // Test that addresses can be created and parsed - let node = NodeId::generate(); - let actor_id = ActorId::new(node, 123); - assert_eq!(actor_id.local_id(), 123); - assert_eq!(actor_id.node(), node); + // Test that ActorIds can be created + let actor_id = ActorId::generate(); + assert_ne!(actor_id.0, 0); + + // Test creating from specific value + let actor_id2 = ActorId::new(12345); + assert_eq!(actor_id2.0, 12345); } } diff --git a/crates/pulsing-actor/tests/cluster/member_tests.rs b/crates/pulsing-actor/tests/cluster/member_tests.rs index c36f76dcc..1c7eccf16 100644 --- a/crates/pulsing-actor/tests/cluster/member_tests.rs +++ b/crates/pulsing-actor/tests/cluster/member_tests.rs @@ -244,7 +244,7 @@ fn test_member_info_hash() { #[test] fn test_actor_location() { - let actor_id = ActorId::local(1); + let actor_id = ActorId::generate(); let node_id = NodeId::generate(); let location = ActorLocation::new(actor_id, node_id); @@ -282,7 +282,7 @@ fn test_failure_info() { #[test] fn test_named_actor_instance_new() { let node_id = NodeId::generate(); - let actor_id = ActorId::local(42); + let actor_id = ActorId::generate(); let instance = NamedActorInstance::new(node_id, actor_id); @@ -294,7 +294,7 @@ fn test_named_actor_instance_new() { #[test] fn test_named_actor_instance_with_metadata() { let node_id = NodeId::generate(); - let actor_id = ActorId::local(42); + let actor_id = ActorId::generate(); let mut metadata = HashMap::new(); metadata.insert("class".to_string(), "Counter".to_string()); metadata.insert("module".to_string(), "__main__".to_string()); @@ -348,7 +348,7 @@ fn test_named_actor_info_with_instance() { fn test_named_actor_info_with_full_instance() { let path = ActorPath::new("actors/counter").unwrap(); let node_id = NodeId::generate(); - let actor_id = ActorId::local(42); + let actor_id = ActorId::generate(); let mut metadata = HashMap::new(); metadata.insert("class".to_string(), "Counter".to_string()); @@ -400,8 +400,8 @@ fn test_named_actor_info_add_full_instance() { let path = ActorPath::new("actors/counter").unwrap(); let node1 = NodeId::generate(); let node2 = NodeId::generate(); - let actor_id1 = ActorId::local(1); - let actor_id2 = ActorId::local(2); + let actor_id1 = ActorId::generate(); + let actor_id2 = ActorId::generate(); let mut info = NamedActorInfo::new(path); @@ -471,8 +471,8 @@ fn test_named_actor_info_merge_with_full_instances() { let path = ActorPath::new("actors/counter").unwrap(); let node1 = NodeId::generate(); let node2 = NodeId::generate(); - let actor_id1 = ActorId::local(1); - let actor_id2 = ActorId::local(2); + let actor_id1 = ActorId::generate(); + let actor_id2 = ActorId::generate(); let mut metadata1 = HashMap::new(); metadata1.insert("class".to_string(), "Counter".to_string()); diff --git a/crates/pulsing-actor/tests/cluster_tests.rs b/crates/pulsing-actor/tests/cluster_tests.rs index 0b7f98d35..c64bd6ec0 100644 --- a/crates/pulsing-actor/tests/cluster_tests.rs +++ b/crates/pulsing-actor/tests/cluster_tests.rs @@ -74,39 +74,40 @@ async fn test_system_with_specific_addr() { #[test] fn test_actor_id_creation() { - let node_id = NodeId::generate(); - let actor_id = ActorId::new(node_id, 123); + // Test generating a new ActorId + let actor_id = ActorId::generate(); + assert_ne!(actor_id.0, 0); - assert_eq!(actor_id.node(), node_id); - assert_eq!(actor_id.local_id(), 123); + // Test creating from specific value + let actor_id2 = ActorId::new(12345); + assert_eq!(actor_id2.0, 12345); } #[test] -fn test_actor_id_local() { - let actor_id = ActorId::local(456); +fn test_actor_id_uniqueness() { + // UUID-based IDs should be unique + let id1 = ActorId::generate(); + let id2 = ActorId::generate(); - assert!(actor_id.node().is_local()); - assert_eq!(actor_id.local_id(), 456); + assert_ne!(id1, id2); } #[test] fn test_actor_id_equality() { - let node_id = NodeId::generate(); - let id1 = ActorId::new(node_id, 1); - let id2 = ActorId::new(node_id, 1); + // Same value should be equal + let id1 = ActorId::new(12345); + let id2 = ActorId::new(12345); assert_eq!(id1, id2); } #[test] fn test_actor_id_display() { - let node_id = NodeId::generate(); - let actor_id = ActorId::new(node_id, 42); + let actor_id = ActorId::generate(); let display = format!("{}", actor_id); - // Display format is "node_id:local_id" - assert!(display.contains("42")); - assert!(display.contains(&node_id.0.to_string())); + // Display format is UUID (32 hex characters) + assert_eq!(display.len(), 32); } // ============================================================================ diff --git a/crates/pulsing-actor/tests/http2_transport_tests.rs b/crates/pulsing-actor/tests/http2_transport_tests.rs index eb95c365e..864ac521f 100644 --- a/crates/pulsing-actor/tests/http2_transport_tests.rs +++ b/crates/pulsing-actor/tests/http2_transport_tests.rs @@ -396,7 +396,7 @@ async fn test_http2_remote_transport_ask() { // Use the RemoteTransport trait use pulsing_actor::actor::RemoteTransport; - let actor_id = ActorId::local(1); + let actor_id = ActorId::generate(); let response = transport .request(&actor_id, "TestType", b"payload".to_vec()) .await @@ -436,7 +436,7 @@ async fn test_http2_remote_transport_tell() { // Use the RemoteTransport trait use pulsing_actor::actor::RemoteTransport; - let actor_id = ActorId::local(2); + let actor_id = ActorId::generate(); transport .send(&actor_id, "FireMsg", b"data".to_vec()) .await @@ -478,7 +478,7 @@ async fn test_http2_remote_transport_named_path() { // Use the RemoteTransport trait use pulsing_actor::actor::RemoteTransport; - let actor_id = ActorId::local(3); + let actor_id = ActorId::generate(); let response = transport .request(&actor_id, "Inference", b"prompt".to_vec()) .await diff --git a/crates/pulsing-actor/tests/integration_tests.rs b/crates/pulsing-actor/tests/integration_tests.rs index 05c93ec52..4056a78b5 100644 --- a/crates/pulsing-actor/tests/integration_tests.rs +++ b/crates/pulsing-actor/tests/integration_tests.rs @@ -542,6 +542,7 @@ mod lifecycle_tests { mod addressing_tests { use super::*; + use pulsing_actor::actor::ActorId; #[tokio::test] async fn test_spawn_named_actor() { @@ -621,7 +622,7 @@ mod addressing_tests { .unwrap(); // Get the full address using the actual actor id - let addr = ActorAddress::local(actor_ref.id().local_id()); + let addr = ActorAddress::local(*actor_ref.id()); // Resolve let resolved_ref = ActorSystemOpsExt::resolve_address(&system, &addr) @@ -649,10 +650,8 @@ mod addressing_tests { .await .unwrap(); - // Resolve using local address (node_id = 0) with actual actor id - let addr = - ActorAddress::parse(&format!("actor://0/{}", actor_ref.id().local_id())).unwrap(); - assert!(addr.is_local()); + // Resolve using global address with actual actor id + let addr = ActorAddress::global(*actor_ref.id()); let resolved_ref = ActorSystemOpsExt::resolve_address(&system, &addr) .await @@ -722,19 +721,17 @@ mod addressing_tests { assert_eq!(addr.path().unwrap().namespace(), "services"); assert_eq!(addr.path().unwrap().name(), "api"); - // Named instance (node_id is now u64) + // Named instance (uses u128 node_id) let addr = ActorAddress::parse("actor:///services/api@123").unwrap(); assert!(addr.is_named()); assert_eq!(addr.node_id().map(|n| n.0), Some(123)); - // Global (node_id and actor_id are now u64) - let addr = ActorAddress::parse("actor://456/789").unwrap(); + // Global address with UUID format + let actor_id = ActorId::generate(); + let addr_str = format!("actor://{}", actor_id); + let addr = ActorAddress::parse(&addr_str).unwrap(); assert!(addr.is_global()); - assert_eq!(addr.actor_id(), Some(789)); - - // Local (node_id = 0) - let addr = ActorAddress::parse("actor://0/100").unwrap(); - assert!(addr.is_local()); + assert_eq!(addr.actor_id(), Some(actor_id)); } #[tokio::test] diff --git a/crates/pulsing-actor/tests/multi_node_tests.rs b/crates/pulsing-actor/tests/multi_node_tests.rs index 579b49882..d38a44f49 100644 --- a/crates/pulsing-actor/tests/multi_node_tests.rs +++ b/crates/pulsing-actor/tests/multi_node_tests.rs @@ -464,8 +464,8 @@ mod edge_case_tests { let ref1 = system1.spawn_named("test/shared-name", Echo).await.unwrap(); let ref2 = system2.spawn_named("test/shared-name", Echo).await.unwrap(); - // They should have different full IDs (different node IDs) - assert_ne!(ref1.id().node(), ref2.id().node()); + // With UUID-based IDs, each actor has a unique ID + assert_ne!(ref1.id(), ref2.id()); // Both should be local actors on their respective systems assert!(ref1.is_local()); assert!(ref2.is_local()); @@ -686,12 +686,12 @@ mod addressing_multi_node_tests { } #[tokio::test] - async fn test_resolve_global_address_cross_node() { + async fn test_resolve_named_actor_cross_node() { // Node 1 let config1 = create_cluster_config(20087); let system1 = ActorSystem::new(config1).await.unwrap(); let gossip1_addr = system1.addr(); - let node1_id = *system1.node_id(); + let _node1_id = *system1.node_id(); // Node 2 joins let mut config2 = create_cluster_config(20088); @@ -701,14 +701,16 @@ mod addressing_multi_node_tests { // Wait for cluster formation tokio::time::sleep(Duration::from_millis(500)).await; - // Create regular actor on node 1 - let actor_ref = system1 + // Create named actor on node 1 + let _actor_ref = system1 .spawn_named("test/remote_worker", Echo) .await .unwrap(); - // Node 2 resolves using global address with retries - let addr = ActorAddress::global(node1_id, actor_ref.id().local_id()); + // Node 2 resolves using named address with retries + // Note: With UUID-based ActorIds, we can no longer derive node from ActorId. + // Use named resolution instead for cross-node actor lookup. + let addr = ActorAddress::named(ActorPath::new("test/remote_worker").unwrap()); let mut resolved_ref = None; for attempt in 1..=15 { match ActorSystemOpsExt::resolve_address(&system2, &addr).await { @@ -720,14 +722,14 @@ mod addressing_multi_node_tests { tokio::time::sleep(Duration::from_millis(200)).await; } Err(e) => { - panic!("Failed to resolve global address after 15 attempts: {}", e); + panic!("Failed to resolve named address after 15 attempts: {}", e); } } } - let resolved_ref = resolved_ref.expect("Should resolve global address"); + let resolved_ref = resolved_ref.expect("Should resolve named address"); - // Should be a remote reference + // Should be a remote reference from node 2's perspective assert!(!resolved_ref.is_local()); // Call should work diff --git a/crates/pulsing-actor/tests/system_actor_tests.rs b/crates/pulsing-actor/tests/system_actor_tests.rs index ff6e3b781..d02b7c47d 100644 --- a/crates/pulsing-actor/tests/system_actor_tests.rs +++ b/crates/pulsing-actor/tests/system_actor_tests.rs @@ -426,7 +426,7 @@ async fn test_system_actor_uptime_increases() { #[test] fn test_actor_registry() { let registry = ActorRegistry::new(); - let actor_id = ActorId::local(1); + let actor_id = ActorId::generate(); registry.register("test", actor_id, "TestActor"); assert!(registry.contains("test")); @@ -444,8 +444,8 @@ fn test_actor_registry() { fn test_actor_registry_list_all() { let registry = ActorRegistry::new(); - registry.register("actor1", ActorId::local(1), "TypeA"); - registry.register("actor2", ActorId::local(2), "TypeB"); + registry.register("actor1", ActorId::generate(), "TypeA"); + registry.register("actor2", ActorId::generate(), "TypeB"); let actors = registry.list_all(); assert_eq!(actors.len(), 2); diff --git a/crates/pulsing-py/Cargo.toml b/crates/pulsing-py/Cargo.toml index b1792e6ff..6bb690a3c 100644 --- a/crates/pulsing-py/Cargo.toml +++ b/crates/pulsing-py/Cargo.toml @@ -27,6 +27,7 @@ tracing = { workspace = true } tracing-subscriber = { workspace = true } reqwest = { workspace = true } pythonize = "0.23" +uuid = { workspace = true } [dependencies.pyo3] version = "0.23.4" diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index 6bc9f28c7..55d120d5e 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -38,10 +38,39 @@ impl PyNodeId { } } + /// Create a new NodeId from a u128 value or string UUID #[new] - fn new(id: u64) -> Self { - Self { - inner: NodeId::new(id), + #[pyo3(signature = (id=None))] + fn new(id: Option<&Bound<'_, pyo3::PyAny>>) -> PyResult { + match id { + None => Ok(Self { + inner: NodeId::generate(), + }), + Some(py_id) => { + // Try to extract as string first (UUID format) + if let Ok(s) = py_id.extract::() { + if let Ok(uuid) = uuid::Uuid::parse_str(&s) { + return Ok(Self { + inner: NodeId::new(uuid.as_u128()), + }); + } + } + // Try as integer + if let Ok(n) = py_id.extract::() { + return Ok(Self { + inner: NodeId::new(n), + }); + } + // Try as smaller integer + if let Ok(n) = py_id.extract::() { + return Ok(Self { + inner: NodeId::new(n as u128), + }); + } + Err(PyValueError::new_err( + "NodeId must be a UUID string or integer", + )) + } } } @@ -52,11 +81,17 @@ impl PyNodeId { } } + /// Get the raw u128 value #[getter] - fn id(&self) -> u64 { + fn id(&self) -> u128 { self.inner.0 } + /// Get the UUID string representation + fn uuid(&self) -> String { + self.inner.to_string() + } + fn is_local(&self) -> bool { self.inner.is_local() } @@ -79,33 +114,59 @@ pub struct PyActorId { #[pymethods] impl PyActorId { + /// Create a new ActorId from a u128 value, string UUID, or generate a new one #[new] - #[pyo3(signature = (local_id, node=None))] - fn new(local_id: u64, node: Option) -> Self { - let inner = match node { - Some(n) => ActorId::new(n.inner, local_id), - None => ActorId::local(local_id), - }; - Self { inner } + #[pyo3(signature = (id=None))] + fn new(id: Option<&Bound<'_, pyo3::PyAny>>) -> PyResult { + match id { + None => Ok(Self { + inner: ActorId::generate(), + }), + Some(py_id) => { + // Try to extract as string first (UUID format) + if let Ok(s) = py_id.extract::() { + if let Ok(uuid) = uuid::Uuid::parse_str(&s) { + return Ok(Self { + inner: ActorId::new(uuid.as_u128()), + }); + } + } + // Try as integer + if let Ok(n) = py_id.extract::() { + return Ok(Self { + inner: ActorId::new(n), + }); + } + // Try as smaller integer + if let Ok(n) = py_id.extract::() { + return Ok(Self { + inner: ActorId::new(n as u128), + }); + } + Err(PyValueError::new_err( + "ActorId must be a UUID string or integer", + )) + } + } } + /// Generate a new random ActorId #[staticmethod] - fn local(local_id: u64) -> Self { + fn generate() -> Self { Self { - inner: ActorId::local(local_id), + inner: ActorId::generate(), } } + /// Get the raw u128 value #[getter] - fn local_id(&self) -> u64 { - self.inner.local_id() + fn id(&self) -> u128 { + self.inner.0 } - #[getter] - fn node(&self) -> PyNodeId { - PyNodeId { - inner: self.inner.node(), - } + /// Get the UUID string representation + fn uuid(&self) -> String { + self.inner.to_string() } fn __str__(&self) -> String { @@ -113,11 +174,7 @@ impl PyActorId { } fn __repr__(&self) -> String { - format!( - "ActorId(local_id={}, node={})", - self.inner.local_id(), - self.inner.node() - ) + format!("ActorId({})", self.inner.0) } fn __hash__(&self) -> u64 { @@ -131,31 +188,25 @@ impl PyActorId { self.inner == other.inner } - /// Parse ActorId from string format "node_id:local_id" + /// Parse ActorId from string (UUID format) #[staticmethod] fn from_str(s: &str) -> PyResult { - let parts: Vec<&str> = s.split(':').collect(); - if parts.len() != 2 { - return Err(pyo3::exceptions::PyValueError::new_err(format!( - "Invalid ActorId format: '{}'. Expected 'node_id:local_id'", - s - ))); + // Try to parse as UUID + if let Ok(uuid) = uuid::Uuid::parse_str(s) { + return Ok(Self { + inner: ActorId::new(uuid.as_u128()), + }); } - let node_id: u64 = parts[0].parse().map_err(|_| { - pyo3::exceptions::PyValueError::new_err(format!( - "Invalid node_id in ActorId: '{}'", - parts[0] - )) - })?; - let local_id: u64 = parts[1].parse().map_err(|_| { - pyo3::exceptions::PyValueError::new_err(format!( - "Invalid local_id in ActorId: '{}'", - parts[1] - )) - })?; - Ok(Self { - inner: ActorId::new(NodeId::new(node_id), local_id), - }) + // Try to parse as simple integer + if let Ok(n) = s.parse::() { + return Ok(Self { + inner: ActorId::new(n), + }); + } + Err(pyo3::exceptions::PyValueError::new_err(format!( + "Invalid ActorId format: '{}'. Expected UUID string or integer", + s + ))) } } @@ -1314,11 +1365,13 @@ impl PyActorSystem { pyo3_async_runtimes::tokio::future_into_py(py, async move { let members = system.members().await; + // Return all fields as strings for safe JSON serialization let result: Vec> = members .into_iter() .map(|m| { let mut map = std::collections::HashMap::new(); - map.insert("node_id".to_string(), m.node_id.to_string()); + // Use string representation to avoid JSON integer overflow + map.insert("node_id".to_string(), m.node_id.0.to_string()); map.insert("addr".to_string(), m.addr.to_string()); map.insert("status".to_string(), format!("{:?}", m.status)); map @@ -1470,7 +1523,7 @@ impl PyActorSystem { &self, py: Python<'py>, name: String, - node_id: Option, + node_id: Option, ) -> PyResult> { let system = self.inner.clone(); @@ -1502,7 +1555,7 @@ impl PyActorSystem { &self, py: Python<'py>, name: String, - node_id: Option, + node_id: Option, ) -> PyResult> { self.resolve_named(py, name, node_id) } @@ -1536,7 +1589,7 @@ impl PyActorSystem { } /// Get remote SystemActor reference (for remote nodes) - fn remote_system<'py>(&self, py: Python<'py>, node_id: u64) -> PyResult> { + fn remote_system<'py>(&self, py: Python<'py>, node_id: u128) -> PyResult> { let system = self.inner.clone(); pyo3_async_runtimes::tokio::future_into_py(py, async move { diff --git a/python/pulsing/actor/remote.py b/python/pulsing/actor/remote.py index 36ce248c0..aac8c9ba0 100644 --- a/python/pulsing/actor/remote.py +++ b/python/pulsing/actor/remote.py @@ -515,8 +515,9 @@ def factory(): return Message.from_json( "Created", { - "actor_id": actor_ref.actor_id.local_id, - "node_id": self.system.node_id.id, + # actor_id is now a UUID (u128), transmit as string for JSON + "actor_id": str(actor_ref.actor_id.id), + "node_id": str(self.system.node_id.id), "methods": method_names, }, ) @@ -698,10 +699,11 @@ async def remote( public = name is not None members = await system.members() - local_id = system.node_id.id + # members["node_id"] is string, convert local_id to string for comparison + local_id = str(system.node_id.id) - # Filter out remote nodes - remote_nodes = [m for m in members if int(m["node_id"]) != local_id] + # Filter out remote nodes (node_id is string) + remote_nodes = [m for m in members if m["node_id"] != local_id] if not remote_nodes: # No remote nodes, fallback to local creation @@ -710,6 +712,7 @@ async def remote( # Randomly select one target = random.choice(remote_nodes) + # Convert back to int for resolve_named target_id = int(target["node_id"]) # Get target node's Python actor creation service @@ -747,9 +750,13 @@ async def remote( raise RuntimeError(f"Remote create failed: {data.get('error')}") # Build remote ActorRef - from pulsing._core import ActorId, NodeId + from pulsing._core import ActorId - remote_id = ActorId(data["actor_id"], NodeId(data["node_id"])) + # actor_id is now a UUID (u128), may be transmitted as string + actor_id = data["actor_id"] + if isinstance(actor_id, str): + actor_id = int(actor_id) + remote_id = ActorId(actor_id) actor_ref = await system.actor_ref(remote_id) return ActorProxy( diff --git a/python/pulsing/actors/load_stream.py b/python/pulsing/actors/load_stream.py index ffa91bf54..293a357c1 100644 --- a/python/pulsing/actors/load_stream.py +++ b/python/pulsing/actors/load_stream.py @@ -228,10 +228,9 @@ async def _subscribe_worker(self, node_id: str): return try: # Use resolve_named instead of unbound get_actor_ref - # node_id needs to be converted from string to int - nid_int = int(node_id) + # node_id is string from members(), convert to int for resolve_named worker_ref = await self._system.resolve_named( - self._worker_name, node_id=nid_int + self._worker_name, node_id=int(node_id) ) if worker_ref: self._worker_refs[node_id] = worker_ref diff --git a/python/pulsing/actors/scheduler.py b/python/pulsing/actors/scheduler.py index bd4ccee18..751cb2a53 100644 --- a/python/pulsing/actors/scheduler.py +++ b/python/pulsing/actors/scheduler.py @@ -63,11 +63,10 @@ async def get_healthy_worker_count(self) -> int: workers = await self.get_available_workers() return sum(1 for w in workers if w.get("status") == "Alive") - async def _resolve_worker(self, node_id: str | None = None): + async def _resolve_worker(self, node_id: int | None = None): try: - # node_id is serialized as string in MemberInfo, need to convert back to int to match resolve_named - nid_int = int(node_id) if node_id else None - return await self._system.resolve_named(self._worker_name, node_id=nid_int) + # node_id is now u128 integer from members() + return await self._system.resolve_named(self._worker_name, node_id=node_id) except Exception: return None diff --git a/python/pulsing/queue/manager.py b/python/pulsing/queue/manager.py index 016f693ef..aef75a766 100644 --- a/python/pulsing/queue/manager.py +++ b/python/pulsing/queue/manager.py @@ -45,13 +45,14 @@ def _compute_owner(bucket_key: str, nodes: list[dict]) -> int | None: node_id = node.get("node_id") if node_id is None: continue - node_id = int(node_id) + # node_id is u128 integer, convert to string for consistent hashing + node_id_str = str(node_id) # Combine key and node_id to calculate hash score - combined = f"{bucket_key}:{node_id}" + combined = f"{bucket_key}:{node_id_str}" score = int(hashlib.md5(combined.encode()).hexdigest(), 16) if score > best_score: best_score = score - best_node_id = node_id + best_node_id = node_id # Keep as integer return best_node_id @@ -217,9 +218,10 @@ async def _handle_message(self, msg: Message) -> Message | None: bucket_key = self._bucket_key(topic, bucket_id) members = await self._refresh_members() owner_node_id = _compute_owner(bucket_key, members) - local_node_id = self.system.node_id.id + # node_id.id returns u128, convert to string for comparison with members + local_node_id = str(self.system.node_id.id) - # Determine if belongs to this node + # Determine if belongs to this node (string comparison) if owner_node_id is None or owner_node_id == local_node_id: # This node is responsible, create/return bucket bucket_ref = await self._get_or_create_bucket( @@ -231,9 +233,9 @@ async def _handle_message(self, msg: Message) -> Message | None: "_type": "BucketReady", # Fallback: msg_type may be lost across nodes "topic": topic, "bucket_id": bucket_id, - "actor_id": bucket_ref.actor_id.local_id, - # Use hex string to transmit node_id, avoid JSON big integer precision loss - "node_id_hex": hex(local_node_id), + # Use string for JSON serialization of large u128 integers + "actor_id": str(bucket_ref.actor_id.id), + "node_id": str(local_node_id), }, ) else: @@ -241,9 +243,8 @@ async def _handle_message(self, msg: Message) -> Message | None: # Find owner node address owner_addr = None for m in members: - # node_id might be string, convert to int for comparison m_node_id = m.get("node_id") - if m_node_id is not None and int(m_node_id) == owner_node_id: + if m_node_id is not None and m_node_id == owner_node_id: owner_addr = m.get("addr") break @@ -253,8 +254,8 @@ async def _handle_message(self, msg: Message) -> Message | None: "_type": "Redirect", # Fallback: msg_type may be lost across nodes "topic": topic, "bucket_id": bucket_id, - # Use hex string to transmit node_id, avoid JSON big integer precision loss - "owner_node_id_hex": hex(owner_node_id), + # Use string for JSON serialization of large integers + "owner_node_id": str(owner_node_id), "owner_addr": owner_addr, }, ) @@ -269,7 +270,8 @@ async def _handle_message(self, msg: Message) -> Message | None: topic_key = self._topic_key(topic_name) members = await self._refresh_members() owner_node_id = _compute_owner(topic_key, members) - local_node_id = self.system.node_id.id + # node_id.id returns u128, convert to string for comparison with members + local_node_id = str(self.system.node_id.id) if owner_node_id is None or owner_node_id == local_node_id: # This node is responsible, create/return topic broker @@ -279,8 +281,9 @@ async def _handle_message(self, msg: Message) -> Message | None: { "_type": "TopicReady", "topic": topic_name, - "actor_id": broker_ref.actor_id.local_id, - "node_id_hex": hex(local_node_id), + # Use string for JSON serialization of large u128 integers + "actor_id": str(broker_ref.actor_id.id), + "node_id": str(local_node_id), }, ) else: @@ -288,7 +291,7 @@ async def _handle_message(self, msg: Message) -> Message | None: owner_addr = None for m in members: m_node_id = m.get("node_id") - if m_node_id is not None and int(m_node_id) == owner_node_id: + if m_node_id is not None and m_node_id == owner_node_id: owner_addr = m.get("addr") break @@ -297,7 +300,8 @@ async def _handle_message(self, msg: Message) -> Message | None: { "_type": "Redirect", "topic": topic_name, - "owner_node_id_hex": hex(owner_node_id), + # Use string for JSON serialization of large integers + "owner_node_id": str(owner_node_id), "owner_addr": owner_addr, }, ) @@ -319,7 +323,8 @@ async def _handle_message(self, msg: Message) -> Message | None: return Message.from_json( "Stats", { - "node_id": self.system.node_id.id, + # Use string for JSON serialization of large u128 integers + "node_id": str(self.system.node_id.id), "bucket_count": len(self._buckets), "topic_count": len(self._topics), "buckets": [ @@ -435,17 +440,18 @@ async def get_bucket_ref( if msg_type == "BucketReady": # Successfully got bucket actor_id = resp_data["actor_id"] - # node_id transmitted as hex string, convert to int - node_id = int(resp_data["node_id_hex"], 16) + # actor_id is now a UUID (u128), transmitted as string + if isinstance(actor_id, str): + actor_id = int(actor_id) - bucket_actor_id = ActorId(actor_id, NodeId(node_id)) + bucket_actor_id = ActorId(actor_id) return await system.actor_ref(bucket_actor_id) elif msg_type == "Redirect": # Need to redirect to other node - # owner_node_id transmitted as hex string, convert to int - hex_str = resp_data.get("owner_node_id_hex") - owner_node_id = int(hex_str, 16) + # owner_node_id transmitted as string, convert to int + owner_node_id_str = resp_data.get("owner_node_id") + owner_node_id = int(owner_node_id_str) owner_addr = resp_data.get("owner_addr") logger.debug( @@ -517,12 +523,15 @@ async def get_topic_broker( if msg_type == "TopicReady": actor_id = resp_data["actor_id"] - node_id = int(resp_data["node_id_hex"], 16) - broker_actor_id = ActorId(actor_id, NodeId(node_id)) + # actor_id is now a UUID (u128), transmitted as string + if isinstance(actor_id, str): + actor_id = int(actor_id) + broker_actor_id = ActorId(actor_id) return await system.actor_ref(broker_actor_id) elif msg_type == "Redirect": - owner_node_id = int(resp_data["owner_node_id_hex"], 16) + # owner_node_id transmitted as string, convert to int + owner_node_id = int(resp_data["owner_node_id"]) logger.debug(f"Redirecting topic {topic} to node {owner_node_id}") diff --git a/python/pulsing/topic/broker.py b/python/pulsing/topic/broker.py index 31ac886ac..59d20d1e2 100644 --- a/python/pulsing/topic/broker.py +++ b/python/pulsing/topic/broker.py @@ -84,7 +84,9 @@ async def _handle(self, msg: Message) -> Message | None: async def _subscribe(self, data: dict) -> Message: subscriber_id = data.get("subscriber_id") actor_name = data.get("actor_name") - node_id = data.get("node_id") + node_id_raw = data.get("node_id") + # node_id comes as string from JSON, convert to int for resolve_named + node_id = int(node_id_raw) if node_id_raw else None if not subscriber_id or not actor_name: return Message.from_json( diff --git a/python/pulsing/topic/topic.py b/python/pulsing/topic/topic.py index fcca66739..ba1bf5335 100644 --- a/python/pulsing/topic/topic.py +++ b/python/pulsing/topic/topic.py @@ -245,7 +245,8 @@ async def start(self) -> None: { "subscriber_id": self._reader_id, "actor_name": actor_name, - "node_id": self._system.node_id.id, + # Use string for JSON serialization of large u128 integers + "node_id": str(self._system.node_id.id), }, ) ) diff --git a/tests/python/test_system_actor.py b/tests/python/test_system_actor.py index 5d602cbf6..12e5cb44e 100644 --- a/tests/python/test_system_actor.py +++ b/tests/python/test_system_actor.py @@ -83,7 +83,8 @@ async def test_ping_local(system): assert result["type"] == "Pong" assert "node_id" in result assert "timestamp" in result - assert result["node_id"] == system.node_id.id + # node_id is serialized as string in JSON for u128 precision + assert int(result["node_id"]) == system.node_id.id @pytest.mark.asyncio @@ -95,7 +96,8 @@ async def test_ping_direct_message(system): data = resp.to_json() assert data["type"] == "Pong" - assert data["node_id"] == system.node_id.id + # node_id is serialized as string in JSON for u128 precision + assert int(data["node_id"]) == system.node_id.id # ============================================================================ @@ -137,7 +139,8 @@ async def test_get_node_info(system): result = await get_node_info(system) assert result["type"] == "NodeInfo" - assert result["node_id"] == system.node_id.id + # node_id is serialized as string in JSON for u128 precision + assert int(result["node_id"]) == system.node_id.id assert "addr" in result assert "uptime_secs" in result @@ -335,7 +338,8 @@ async def test_concurrent_ping_requests(system): for result in results: assert result["type"] == "Pong" - assert result["node_id"] == system.node_id.id + # node_id is serialized as string in JSON for u128 precision + assert int(result["node_id"]) == system.node_id.id @pytest.mark.asyncio From 509bfaccabb5ec9928eff4ecc831efff4435aa43 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 25 Jan 2026 22:09:21 +0800 Subject: [PATCH 5/7] Enhance actor proxies and refactor message handling - Introduced SystemActorProxy and PythonActorServiceProxy for direct method calls, improving interaction with system and service actors. - Refactored BucketStorage and TopicBroker to utilize remote method support, streamlining actor communication. - Updated tests to validate new proxy methods and ensure proper functionality across various scenarios. - Enhanced message serialization and error handling for improved robustness and clarity in actor interactions. --- crates/pulsing-py/src/actor.rs | 15 +- python/pulsing/actor/__init__.py | 6 + python/pulsing/actor/remote.py | 214 ++++++++++++++++++--- python/pulsing/queue/manager.py | 42 ++-- python/pulsing/queue/queue.py | 74 +++----- python/pulsing/queue/storage.py | 147 +++++++------- python/pulsing/topic/__init__.py | 2 + python/pulsing/topic/broker.py | 254 ++++++++++++------------- python/pulsing/topic/topic.py | 102 ++++++---- tests/python/test_queue.py | 79 ++++++-- tests/python/test_queue_backends.py | 74 ++++---- tests/python/test_system_actor.py | 284 ++++++++++------------------ tests/python/test_topic.py | 36 +--- 13 files changed, 727 insertions(+), 602 deletions(-) diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index 55d120d5e..461e4af86 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -1411,9 +1411,10 @@ impl PyActorSystem { .into_iter() .map(|(member, instance_opt)| { let mut map = std::collections::HashMap::new(); + // Use decimal string for node_id to match members() format map.insert( "node_id".to_string(), - serde_json::Value::String(member.node_id.to_string()), + serde_json::Value::String(member.node_id.0.to_string()), ); map.insert( "addr".to_string(), @@ -1426,9 +1427,10 @@ impl PyActorSystem { // Add detailed instance info if available if let Some(inst) = instance_opt { + // Use decimal string for actor_id to match other APIs map.insert( "actor_id".to_string(), - serde_json::Value::String(inst.actor_id.to_string()), + serde_json::Value::String(inst.actor_id.0.to_string()), ); // Add metadata fields for (k, v) in inst.metadata { @@ -1471,11 +1473,11 @@ impl PyActorSystem { info.instance_count(), )), ); - // Convert instance_nodes (HashSet) to list of node IDs as strings + // Convert instance_nodes (HashSet) to list of node IDs as decimal strings let instances: Vec = info .instance_nodes .iter() - .map(|id| serde_json::Value::String(id.to_string())) + .map(|id| serde_json::Value::String(id.0.to_string())) .collect(); map.insert("instances".to_string(), serde_json::Value::Array(instances)); @@ -1485,13 +1487,14 @@ impl PyActorSystem { .iter() .map(|(node_id, inst)| { let mut inst_map = serde_json::Map::new(); + // Use decimal string to match members() format inst_map.insert( "node_id".to_string(), - serde_json::Value::String(node_id.to_string()), + serde_json::Value::String(node_id.0.to_string()), ); inst_map.insert( "actor_id".to_string(), - serde_json::Value::String(inst.actor_id.to_string()), + serde_json::Value::String(inst.actor_id.0.to_string()), ); // Add metadata for (k, v) in &inst.metadata { diff --git a/python/pulsing/actor/__init__.py b/python/pulsing/actor/__init__.py index 61007d7c2..15041bd29 100644 --- a/python/pulsing/actor/__init__.py +++ b/python/pulsing/actor/__init__.py @@ -187,8 +187,12 @@ async def tell_with_timeout( ActorClass, ActorProxy, PythonActorService, + PythonActorServiceProxy, + SystemActorProxy, get_metrics, get_node_info, + get_python_actor_service, + get_system_actor, health_check, list_actors, ping, @@ -206,6 +210,7 @@ async def tell_with_timeout( "remote", "resolve", "get_system", + "get_system_actor", "is_initialized", # Minimal core types commonly used in docs/examples "Actor", @@ -216,6 +221,7 @@ async def tell_with_timeout( "ActorRef", "ActorId", "ActorProxy", + "SystemActorProxy", # Service (for actor_system function) "PythonActorService", "PYTHON_ACTOR_SERVICE_NAME", diff --git a/python/pulsing/actor/remote.py b/python/pulsing/actor/remote.py index aac8c9ba0..cd621ca79 100644 --- a/python/pulsing/actor/remote.py +++ b/python/pulsing/actor/remote.py @@ -876,44 +876,202 @@ def wrapper(cls): # ============================================================================ +class SystemActorProxy: + """Proxy for SystemActor with direct method calls. + + Example: + system_proxy = await get_system_actor(system) + actors = await system_proxy.list_actors() + metrics = await system_proxy.get_metrics() + await system_proxy.ping() + """ + + def __init__(self, actor_ref: ActorRef): + self._ref = actor_ref + + @property + def ref(self) -> ActorRef: + """Get underlying ActorRef.""" + return self._ref + + async def _ask(self, msg_type: str) -> dict: + """Send SystemMessage and return response.""" + resp = await self._ref.ask( + Message.from_json("SystemMessage", {"type": msg_type}) + ) + return resp.to_json() + + async def list_actors(self) -> list[dict]: + """List all actors on this node.""" + data = await self._ask("ListActors") + if data.get("type") == "Error": + raise RuntimeError(data.get("message")) + return data.get("actors", []) + + async def get_metrics(self) -> dict: + """Get system metrics.""" + return await self._ask("GetMetrics") + + async def get_node_info(self) -> dict: + """Get node info.""" + return await self._ask("GetNodeInfo") + + async def health_check(self) -> dict: + """Health check.""" + return await self._ask("HealthCheck") + + async def ping(self) -> dict: + """Ping this node.""" + return await self._ask("Ping") + + +async def get_system_actor( + system: ActorSystem, node_id: int | None = None +) -> SystemActorProxy: + """Get SystemActorProxy for direct method calls. + + Args: + system: ActorSystem instance + node_id: Target node ID (None means local node) + + Returns: + SystemActorProxy with methods: list_actors(), get_metrics(), etc. + + Example: + sys = await get_system_actor(system) + actors = await sys.list_actors() + await sys.ping() + """ + if node_id is None: + actor_ref = await system.system() + else: + actor_ref = await system.remote_system(node_id) + return SystemActorProxy(actor_ref) + + +class PythonActorServiceProxy: + """Proxy for PythonActorService with direct method calls. + + Example: + service = await get_python_actor_service(system) + classes = await service.list_registry() + actor_ref = await service.create_actor("MyClass", name="my_actor") + """ + + def __init__(self, actor_ref: ActorRef): + self._ref = actor_ref + + @property + def ref(self) -> ActorRef: + """Get underlying ActorRef.""" + return self._ref + + async def list_registry(self) -> list[str]: + """List registered actor classes. + + Returns: + List of registered class names + """ + resp = await self._ref.ask(Message.from_json("ListRegistry", {})) + data = resp.to_json() + return data.get("classes", []) + + async def create_actor( + self, + class_name: str, + *args, + name: str | None = None, + public: bool = True, + restart_policy: str = "never", + max_restarts: int = 3, + min_backoff: float = 0.1, + max_backoff: float = 30.0, + **kwargs, + ) -> dict: + """Create a Python actor. + + Args: + class_name: Name of the registered actor class + *args: Positional arguments for the class constructor + name: Optional actor name + public: Whether the actor should be publicly resolvable + restart_policy: "never", "always", or "on_failure" + max_restarts: Maximum restart attempts + min_backoff: Minimum backoff time in seconds + max_backoff: Maximum backoff time in seconds + **kwargs: Keyword arguments for the class constructor + + Returns: + {"actor_id": "...", "node_id": "...", "actor_name": "..."} + + Raises: + RuntimeError: If creation fails + """ + resp = await self._ref.ask( + Message.from_json( + "CreateActor", + { + "class_name": class_name, + "actor_name": name, + "args": args, + "kwargs": kwargs, + "public": public, + "restart_policy": restart_policy, + "max_restarts": max_restarts, + "min_backoff": min_backoff, + "max_backoff": max_backoff, + }, + ) + ) + data = resp.to_json() + if resp.msg_type == "Error" or data.get("error"): + raise RuntimeError(data.get("error", "Unknown error")) + return data + + +async def get_python_actor_service( + system: ActorSystem, node_id: int | None = None +) -> PythonActorServiceProxy: + """Get PythonActorServiceProxy for direct method calls. + + Args: + system: ActorSystem instance + node_id: Target node ID (None means local node) + + Returns: + PythonActorServiceProxy with methods: list_registry(), create_actor() + + Example: + service = await get_python_actor_service(system) + classes = await service.list_registry() + """ + service_ref = await system.resolve_named(PYTHON_ACTOR_SERVICE_NAME, node_id=node_id) + return PythonActorServiceProxy(service_ref) + + +# Legacy helper functions (for backwards compatibility) async def list_actors(system: ActorSystem) -> list[dict]: """List all actors on the current node.""" - sys_actor = await system.system() - # SystemMessage uses serde tag format - resp = await sys_actor.ask( - Message.from_json("SystemMessage", {"type": "ListActors"}) - ) - data = resp.to_json() - if data.get("type") == "Error": - raise RuntimeError(data.get("message")) - return data.get("actors", []) + proxy = await get_system_actor(system) + return await proxy.list_actors() async def get_metrics(system: ActorSystem) -> dict: """Get system metrics.""" - sys_actor = await system.system() - resp = await sys_actor.ask( - Message.from_json("SystemMessage", {"type": "GetMetrics"}) - ) - return resp.to_json() + proxy = await get_system_actor(system) + return await proxy.get_metrics() async def get_node_info(system: ActorSystem) -> dict: """Get node info.""" - sys_actor = await system.system() - resp = await sys_actor.ask( - Message.from_json("SystemMessage", {"type": "GetNodeInfo"}) - ) - return resp.to_json() + proxy = await get_system_actor(system) + return await proxy.get_node_info() async def health_check(system: ActorSystem) -> dict: """Health check.""" - sys_actor = await system.system() - resp = await sys_actor.ask( - Message.from_json("SystemMessage", {"type": "HealthCheck"}) - ) - return resp.to_json() + proxy = await get_system_actor(system) + return await proxy.health_check() async def ping(system: ActorSystem, node_id: int | None = None) -> dict: @@ -923,12 +1081,8 @@ async def ping(system: ActorSystem, node_id: int | None = None) -> dict: system: ActorSystem instance node_id: Target node ID (None means local node) """ - if node_id is None: - sys_actor = await system.system() - else: - sys_actor = await system.remote_system(node_id) - resp = await sys_actor.ask(Message.from_json("SystemMessage", {"type": "Ping"})) - return resp.to_json() + proxy = await get_system_actor(system, node_id) + return await proxy.ping() async def resolve( diff --git a/python/pulsing/queue/manager.py b/python/pulsing/queue/manager.py index aef75a766..b0cbf8a95 100644 --- a/python/pulsing/queue/manager.py +++ b/python/pulsing/queue/manager.py @@ -3,12 +3,15 @@ import asyncio import hashlib import logging -from typing import Any +from typing import TYPE_CHECKING, Any from pulsing.actor import Actor, ActorId, ActorRef, ActorSystem, Message from .storage import BucketStorage +if TYPE_CHECKING: + from pulsing.actor.remote import ActorProxy + logger = logging.getLogger(__name__) # StorageManager fixed service name @@ -149,17 +152,18 @@ async def _get_or_create_bucket( self._buckets[key] = await self.system.resolve_named(actor_name) logger.debug(f"Resolved existing bucket: {actor_name}") except Exception: - # Create new, use specified backend or default backend - storage = BucketStorage( + # Create new using BucketStorage.local() for proper @remote wrapping + proxy = await BucketStorage.local( + self.system, bucket_id=bucket_id, storage_path=bucket_storage_path, batch_size=batch_size, backend=backend or self.default_backend, backend_options=backend_options, + name=actor_name, + public=True, ) - self._buckets[key] = await self.system.spawn( - storage, name=actor_name, public=True - ) + self._buckets[key] = proxy.ref logger.info(f"Created bucket: {actor_name} at {bucket_storage_path}") return self._buckets[key] @@ -181,10 +185,11 @@ async def _get_or_create_topic_broker(self, topic_name: str) -> ActorRef: # Lazy import to avoid circular dependency from pulsing.topic.broker import TopicBroker - broker = TopicBroker(topic_name, self.system) - self._topics[topic_name] = await self.system.spawn( - broker, name=actor_name, public=True + # Use TopicBroker.local() to create properly wrapped actor + proxy = await TopicBroker.local( + self.system, topic_name, self.system, name=actor_name, public=True ) + self._topics[topic_name] = proxy.ref logger.info(f"Created topic broker: {actor_name}") return self._topics[topic_name] @@ -395,10 +400,11 @@ async def get_bucket_ref( backend: str | type | None = None, backend_options: dict | None = None, max_redirects: int = 3, -) -> ActorRef: - """Get ActorRef for specified bucket +) -> "ActorProxy": + """Get ActorProxy for specified bucket Automatically handles redirects to ensure getting the bucket on the correct node. + Returns ActorProxy for direct method calls on BucketStorage. Args: system: Actor system @@ -410,8 +416,6 @@ async def get_bucket_ref( backend_options: Additional backend options (optional) max_redirects: Maximum redirect count """ - from pulsing.actor import ActorId, NodeId - # Request from local StorageManager first manager = await get_storage_manager(system) @@ -438,14 +442,10 @@ async def get_bucket_ref( msg_type = response.msg_type or resp_data.get("_type", "") if msg_type == "BucketReady": - # Successfully got bucket - actor_id = resp_data["actor_id"] - # actor_id is now a UUID (u128), transmitted as string - if isinstance(actor_id, str): - actor_id = int(actor_id) - - bucket_actor_id = ActorId(actor_id) - return await system.actor_ref(bucket_actor_id) + # Successfully got bucket - resolve by actor name for typed proxy + actor_name = f"bucket_{topic}_{bucket_id}" + # Use BucketStorage.resolve to get typed ActorProxy + return await BucketStorage.resolve(actor_name, system=system) elif msg_type == "Redirect": # Need to redirect to other node diff --git a/python/pulsing/queue/queue.py b/python/pulsing/queue/queue.py index 4026ae911..8f801bd01 100644 --- a/python/pulsing/queue/queue.py +++ b/python/pulsing/queue/queue.py @@ -8,7 +8,8 @@ import logging from typing import TYPE_CHECKING, Any -from pulsing.actor import ActorRef, ActorSystem, Message +from pulsing.actor import ActorSystem +from pulsing.actor.remote import ActorProxy from .manager import get_bucket_ref, get_storage_manager @@ -57,8 +58,8 @@ def __init__( self.backend = backend self.backend_options = backend_options - # Actor references for each bucket - self._bucket_refs: dict[int, ActorRef] = {} + # Actor proxies for each bucket + self._bucket_refs: dict[int, ActorProxy] = {} self._init_lock = asyncio.Lock() # Save event loop reference (for sync wrapper) @@ -74,7 +75,7 @@ def _hash_partition(self, value: Any) -> int: hash_value = int(hashlib.md5(str(value).encode()).hexdigest(), 16) return hash_value % self.num_buckets - async def _ensure_bucket(self, bucket_id: int) -> ActorRef: + async def _ensure_bucket(self, bucket_id: int) -> ActorProxy: """Ensure Actor for specified bucket is created Get bucket reference through StorageManager: @@ -122,12 +123,10 @@ async def put( raise ValueError(f"Missing partition column '{self.bucket_column}'") bucket_id = self._hash_partition(rec[self.bucket_column]) - bucket_ref = await self._ensure_bucket(bucket_id) - - response = await bucket_ref.ask(Message.from_json("Put", {"record": rec})) - if response.msg_type == "Error": - raise RuntimeError(f"Put failed: {response.to_json().get('error')}") + bucket = await self._ensure_bucket(bucket_id) + # Direct method call via proxy + await bucket.put(rec) results.append({"bucket_id": bucket_id, "status": "ok"}) return results[0] if single else results @@ -165,44 +164,28 @@ async def _get_from_bucket( timeout: float | None, ) -> list[dict[str, Any]]: """Read data from specified bucket""" - bucket_ref = await self._ensure_bucket(bucket_id) - - # Use streaming read - response = await bucket_ref.ask( - Message.from_json( - "GetStream", - {"limit": limit, "offset": offset, "wait": wait, "timeout": timeout}, - ) - ) + bucket = await self._ensure_bucket(bucket_id) - if response.msg_type == "Error": - raise RuntimeError(f"Get failed: {response.to_json().get('error')}") - - if not response.is_stream: + # Try streaming read first via proxy + try: + records = [] + async for batch in bucket.get_stream(limit, offset, wait, timeout): + for record in batch: + records.append(record) + if len(records) >= limit: + return records + return records + except Exception: # Fallback to non-streaming - response = await bucket_ref.ask( - Message.from_json("Get", {"limit": limit, "offset": offset}) - ) - return response.to_json().get("records", []) - - records = [] - reader = response.stream_reader() - async for chunk in reader: - for record in chunk.get("records", []): - records.append(record) - if len(records) >= limit: - return records - - return records + return await bucket.get(limit, offset) async def flush(self) -> None: """Flush all bucket buffers""" tasks = [] for bucket_id in range(self.num_buckets): if bucket_id in self._bucket_refs: - tasks.append( - self._bucket_refs[bucket_id].ask(Message.from_json("Flush", {})) - ) + # Direct method call via proxy + tasks.append(self._bucket_refs[bucket_id].flush()) if tasks: await asyncio.gather(*tasks) @@ -211,10 +194,8 @@ async def stats(self) -> dict[str, Any]: bucket_stats = {} for bucket_id in range(self.num_buckets): if bucket_id in self._bucket_refs: - response = await self._bucket_refs[bucket_id].ask( - Message.from_json("Stats", {}) - ) - bucket_stats[bucket_id] = response.to_json() + # Direct method call via proxy + bucket_stats[bucket_id] = await self._bucket_refs[bucket_id].stats() return { "topic": self.topic, @@ -456,11 +437,16 @@ async def read_queue( # Try to resolve existing bucket Actors if assigned_buckets: + from .storage import BucketStorage + for bid in assigned_buckets: # Must match `StorageManager` bucket actor naming: "bucket_{topic}_{bucket_id}" actor_name = f"bucket_{topic}_{bid}" try: - queue._bucket_refs[bid] = await system.resolve_named(actor_name) + # Use BucketStorage.resolve to get typed ActorProxy + queue._bucket_refs[bid] = await BucketStorage.resolve( + actor_name, system=system + ) except Exception: pass diff --git a/python/pulsing/queue/storage.py b/python/pulsing/queue/storage.py index 98682f772..25caf1e75 100644 --- a/python/pulsing/queue/storage.py +++ b/python/pulsing/queue/storage.py @@ -2,16 +2,17 @@ import asyncio import logging -from typing import Any +from typing import Any, AsyncIterator -from pulsing.actor import Actor, ActorId, Message, StreamMessage +from pulsing.actor import ActorId, StreamMessage, remote from .backend import StorageBackend, get_backend_class logger = logging.getLogger(__name__) -class BucketStorage(Actor): +@remote +class BucketStorage: """Storage Actor for a Single Bucket Uses pluggable StorageBackend for data storage. @@ -61,64 +62,82 @@ def on_start(self, actor_id: ActorId) -> None: def on_stop(self) -> None: logger.info(f"BucketStorage[{self.bucket_id}] stopping") - async def receive(self, msg: Message) -> Message | StreamMessage | None: - msg_type = msg.msg_type - data = msg.to_json() - - if msg_type == "Put": - record = data.get("record") - if not record: - return Message.from_json("Error", {"error": "Missing 'record'"}) - - await self._backend.put(record) - return Message.from_json("PutResponse", {"status": "ok"}) - - elif msg_type == "PutBatch": - records = data.get("records") - if not records: - return Message.from_json("Error", {"error": "Missing 'records'"}) - - await self._backend.put_batch(records) - return Message.from_json( - "PutBatchResponse", {"status": "ok", "count": len(records)} - ) - - elif msg_type == "Get": - limit = data.get("limit", 100) - offset = data.get("offset", 0) - records = await self._backend.get(limit, offset) - return Message.from_json("GetResponse", {"records": records}) - - elif msg_type == "GetStream": - limit = data.get("limit", 100) - offset = data.get("offset", 0) - wait: bool = data.get("wait", False) - timeout: float | None = data.get("timeout", None) - - stream_msg, writer = StreamMessage.create("GetStream") - - async def produce(): - try: - async for records in self._backend.get_stream( - limit, offset, wait, timeout - ): - await writer.write({"records": records}) - writer.close() - except Exception as e: - logger.error(f"BucketStorage[{self.bucket_id}] stream error: {e}") - await writer.error(str(e)) - writer.close() - - asyncio.create_task(produce()) - return stream_msg - - elif msg_type == "Flush": - await self._backend.flush() - return Message.from_json("FlushResponse", {"status": "ok"}) - - elif msg_type == "Stats": - stats = await self._backend.stats() - return Message.from_json("StatsResponse", stats) - - else: - return Message.from_json("Error", {"error": f"Unknown: {msg_type}"}) + # ========== Public Remote Methods ========== + + async def put(self, record: dict) -> dict: + """Put a single record. + + Args: + record: Record to store + + Returns: + {"status": "ok"} + """ + if not record: + raise ValueError("Missing 'record'") + await self._backend.put(record) + return {"status": "ok"} + + async def put_batch(self, records: list[dict]) -> dict: + """Put multiple records. + + Args: + records: List of records to store + + Returns: + {"status": "ok", "count": N} + """ + if not records: + raise ValueError("Missing 'records'") + await self._backend.put_batch(records) + return {"status": "ok", "count": len(records)} + + async def get(self, limit: int = 100, offset: int = 0) -> list[dict]: + """Get records. + + Args: + limit: Maximum number of records to return + offset: Starting offset + + Returns: + List of records + """ + return await self._backend.get(limit, offset) + + async def get_stream( + self, + limit: int = 100, + offset: int = 0, + wait: bool = False, + timeout: float | None = None, + ) -> AsyncIterator[list[dict]]: + """Get records as a stream. + + Args: + limit: Maximum number of records to return + offset: Starting offset + wait: Whether to wait for new records + timeout: Timeout in seconds + + Yields: + Batches of records + """ + async for records in self._backend.get_stream(limit, offset, wait, timeout): + yield records + + async def flush(self) -> dict: + """Flush pending writes. + + Returns: + {"status": "ok"} + """ + await self._backend.flush() + return {"status": "ok"} + + async def stats(self) -> dict: + """Get storage statistics. + + Returns: + Statistics dict from backend + """ + return await self._backend.stats() diff --git a/python/pulsing/topic/__init__.py b/python/pulsing/topic/__init__.py index 06fc57ad1..759aab6dd 100644 --- a/python/pulsing/topic/__init__.py +++ b/python/pulsing/topic/__init__.py @@ -26,12 +26,14 @@ async def handle(msg): TopicReader, TopicWriter, read_topic, + subscribe_to_topic, write_topic, ) __all__ = [ "write_topic", "read_topic", + "subscribe_to_topic", "TopicWriter", "TopicReader", "PublishMode", diff --git a/python/pulsing/topic/broker.py b/python/pulsing/topic/broker.py index 59d20d1e2..ffbf41a54 100644 --- a/python/pulsing/topic/broker.py +++ b/python/pulsing/topic/broker.py @@ -6,12 +6,12 @@ import logging import time from dataclasses import dataclass, field -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from pulsing.actor import ActorRef, ActorSystem -from pulsing.actor import Actor, ActorId, Message +from pulsing.actor import ActorId, remote logger = logging.getLogger(__name__) @@ -35,8 +35,9 @@ class _Subscriber: consecutive_failures: int = 0 -class TopicBroker(Actor): - """Topic broker actor.""" +@remote +class TopicBroker: + """Topic broker actor with remote method support.""" def __init__(self, topic: str, system: "ActorSystem"): self.topic = topic @@ -60,44 +61,30 @@ def metadata(self) -> dict[str, str]: "subscriber_count": str(len(self._subscribers)), } - async def receive(self, msg: Message) -> Message | None: - try: - return await self._handle(msg) - except Exception as e: - logger.exception(f"TopicBroker[{self.topic}] error: {e}") - return Message.from_json("Error", {"error": str(e)}) - - async def _handle(self, msg: Message) -> Message | None: - data = msg.to_json() - - if msg.msg_type == "Subscribe": - return await self._subscribe(data) - elif msg.msg_type == "Unsubscribe": - return await self._unsubscribe(data) - elif msg.msg_type == "Publish": - return await self._publish(data) - elif msg.msg_type == "GetStats": - return self._stats() - else: - return Message.from_json("Error", {"error": f"Unknown: {msg.msg_type}"}) - - async def _subscribe(self, data: dict) -> Message: - subscriber_id = data.get("subscriber_id") - actor_name = data.get("actor_name") - node_id_raw = data.get("node_id") - # node_id comes as string from JSON, convert to int for resolve_named - node_id = int(node_id_raw) if node_id_raw else None + # ========== Public Remote Methods ========== + async def subscribe( + self, + subscriber_id: str, + actor_name: str, + node_id: int | None = None, + ) -> dict: + """Subscribe an actor to this topic. + + Args: + subscriber_id: Unique subscriber identifier + actor_name: Name of the actor to receive messages + node_id: Optional node ID (for cross-node subscriptions) + + Returns: + {"success": True, "topic": "..."} + """ if not subscriber_id or not actor_name: - return Message.from_json( - "Error", {"error": "Missing subscriber_id or actor_name"} - ) + raise ValueError("Missing subscriber_id or actor_name") async with self._lock: if subscriber_id in self._subscribers: - return Message.from_json( - "SubscribeResult", {"success": True, "already": True} - ) + return {"success": True, "already": True} self._subscribers[subscriber_id] = _Subscriber( subscriber_id=subscriber_id, @@ -105,52 +92,49 @@ async def _subscribe(self, data: dict) -> Message: node_id=node_id, ) logger.debug(f"TopicBroker[{self.topic}] +subscriber: {subscriber_id}") - return Message.from_json( - "SubscribeResult", {"success": True, "topic": self.topic} - ) + return {"success": True, "topic": self.topic} + + async def unsubscribe(self, subscriber_id: str) -> dict: + """Unsubscribe from this topic. - async def _unsubscribe(self, data: dict) -> Message: - subscriber_id = data.get("subscriber_id") + Args: + subscriber_id: Subscriber ID to remove + + Returns: + {"success": True/False} + """ if not subscriber_id: - return Message.from_json("Error", {"error": "Missing subscriber_id"}) + raise ValueError("Missing subscriber_id") async with self._lock: if subscriber_id in self._subscribers: del self._subscribers[subscriber_id] logger.debug(f"TopicBroker[{self.topic}] -subscriber: {subscriber_id}") - return Message.from_json("UnsubscribeResult", {"success": True}) - return Message.from_json("UnsubscribeResult", {"success": False}) - - async def _resolve(self, sub: _Subscriber) -> "ActorRef | None": - now = time.time() - - if sub._ref is not None and (now - sub._ref_resolved_at) < REF_TTL_SECONDS: - return sub._ref - - try: - sub._ref = await self.system.resolve_named( - sub.actor_name, node_id=sub.node_id - ) - sub._ref_resolved_at = now - return sub._ref - except Exception as e: - logger.warning(f"Failed to resolve {sub.subscriber_id}: {e}") - sub._ref = None - sub._ref_resolved_at = 0 - return None - - async def _publish(self, data: dict) -> Message: - payload = data.get("payload") - mode = data.get("mode", "fire_and_forget") - sender_id = data.get("sender_id") + return {"success": True} + return {"success": False} + async def publish( + self, + payload: Any, + mode: str = "fire_and_forget", + sender_id: str | None = None, + timeout: float = DEFAULT_FANOUT_TIMEOUT, + ) -> dict: + """Publish a message to all subscribers. + + Args: + payload: Message payload + mode: "fire_and_forget", "wait_all_acks", "wait_any_ack", "best_effort" + sender_id: Optional sender ID (excluded from delivery) + timeout: Timeout for ack modes + + Returns: + {"success": True, "delivered": N, "failed": N, "subscriber_count": N} + """ self._total_published += 1 if not self._subscribers: - return Message.from_json( - "PublishResult", - {"success": True, "delivered": 0, "failed": 0, "subscriber_count": 0}, - ) + return {"success": True, "delivered": 0, "failed": 0, "subscriber_count": 0} envelope = { "topic": self.topic, @@ -162,13 +146,51 @@ async def _publish(self, data: dict) -> Message: if mode == "fire_and_forget": return await self._fanout_tell(envelope, sender_id) elif mode == "wait_all_acks": - return await self._fanout_ask(envelope, sender_id, wait_all=True) + return await self._fanout_ask( + envelope, sender_id, wait_all=True, timeout=timeout + ) elif mode == "wait_any_ack": - return await self._fanout_ask(envelope, sender_id, wait_all=False) + return await self._fanout_ask( + envelope, sender_id, wait_all=False, timeout=timeout + ) elif mode == "best_effort": return await self._fanout_best_effort(envelope, sender_id) else: - return Message.from_json("Error", {"error": f"Unknown mode: {mode}"}) + raise ValueError(f"Unknown mode: {mode}") + + def get_stats(self) -> dict: + """Get topic statistics. + + Returns: + {"topic": "...", "subscriber_count": N, "total_published": N, ...} + """ + return { + "topic": self.topic, + "subscriber_count": len(self._subscribers), + "total_published": self._total_published, + "total_delivered": self._total_delivered, + "total_failed": self._total_failed, + } + + # ========== Internal Methods ========== + + async def _resolve(self, sub: _Subscriber) -> "ActorRef | None": + now = time.time() + + if sub._ref is not None and (now - sub._ref_resolved_at) < REF_TTL_SECONDS: + return sub._ref + + try: + sub._ref = await self.system.resolve_named( + sub.actor_name, node_id=sub.node_id + ) + sub._ref_resolved_at = now + return sub._ref + except Exception as e: + logger.warning(f"Failed to resolve {sub.subscriber_id}: {e}") + sub._ref = None + sub._ref_resolved_at = 0 + return None def _record_success(self, sub: _Subscriber) -> None: sub.messages_delivered += 1 @@ -190,7 +212,7 @@ async def _evict_zombies(self, zombie_ids: list[str]) -> None: f"TopicBroker[{self.topic}] evicted zombie subscriber: {sub_id}" ) - async def _fanout_tell(self, envelope: dict, sender_id: str | None) -> Message: + async def _fanout_tell(self, envelope: dict, sender_id: str | None) -> dict: sent = 0 failed = 0 zombies: list[str] = [] @@ -218,15 +240,12 @@ async def _fanout_tell(self, envelope: dict, sender_id: str | None) -> Message: self._total_delivered += sent self._total_failed += failed - return Message.from_json( - "PublishResult", - { - "success": True, - "delivered": sent, - "failed": failed, - "subscriber_count": len(self._subscribers), - }, - ) + return { + "success": True, + "delivered": sent, + "failed": failed, + "subscriber_count": len(self._subscribers), + } async def _fanout_ask( self, @@ -234,7 +253,7 @@ async def _fanout_ask( sender_id: str | None, wait_all: bool, timeout: float = DEFAULT_FANOUT_TIMEOUT, - ) -> Message: + ) -> dict: """Wait for ack mode.""" tasks = [] sub_ids = [] @@ -253,10 +272,7 @@ async def _fanout_ask( if not tasks: await self._evict_zombies(resolve_failed) - return Message.from_json( - "PublishResult", - {"success": True, "delivered": 0, "failed": 0, "subscriber_count": 0}, - ) + return {"success": True, "delivered": 0, "failed": 0, "subscriber_count": 0} delivered = 0 failed = 0 @@ -304,39 +320,31 @@ async def _fanout_ask( if not task.exception(): delivered = 1 break - # Cancel other pending tasks (local cancellation, remote relies on RST_STREAM) + # Cancel other pending tasks for task in pending: task.cancel() except asyncio.TimeoutError: - # Timeout: no response logger.warning( f"TopicBroker[{self.topic}] wait_any_ack timeout after {timeout}s" ) - # Cancel all tasks for task in tasks: if not task.done(): task.cancel() - # Evict zombie subscribers await self._evict_zombies(zombies) self._total_delivered += delivered self._total_failed += failed - return Message.from_json( - "PublishResult", - { - "success": delivered > 0 or failed == 0, - "delivered": delivered, - "failed": failed, - "failed_subscribers": failed_ids, - "subscriber_count": len(self._subscribers), - }, - ) - - async def _fanout_best_effort( - self, envelope: dict, sender_id: str | None - ) -> Message: + return { + "success": delivered > 0 or failed == 0, + "delivered": delivered, + "failed": failed, + "failed_subscribers": failed_ids, + "subscriber_count": len(self._subscribers), + } + + async def _fanout_best_effort(self, envelope: dict, sender_id: str | None) -> dict: """Best-effort: try to send, record failures""" delivered = 0 failed = 0 @@ -363,31 +371,15 @@ async def _fanout_best_effort( if self._record_failure(sub): zombies.append(sub_id) - # Evict zombie subscribers await self._evict_zombies(zombies) self._total_delivered += delivered self._total_failed += failed - return Message.from_json( - "PublishResult", - { - "success": True, - "delivered": delivered, - "failed": failed, - "failed_subscribers": failed_ids, - "subscriber_count": len(self._subscribers), - }, - ) - - def _stats(self) -> Message: - return Message.from_json( - "TopicStats", - { - "topic": self.topic, - "subscriber_count": len(self._subscribers), - "total_published": self._total_published, - "total_delivered": self._total_delivered, - "total_failed": self._total_failed, - }, - ) + return { + "success": True, + "delivered": delivered, + "failed": failed, + "failed_subscribers": failed_ids, + "subscriber_count": len(self._subscribers), + } diff --git a/python/pulsing/topic/topic.py b/python/pulsing/topic/topic.py index ba1bf5335..4959900af 100644 --- a/python/pulsing/topic/topic.py +++ b/python/pulsing/topic/topic.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: from pulsing.actor import ActorRef + from pulsing.actor.remote import ActorProxy from pulsing.actor import Actor, ActorId, ActorSystem, Message @@ -44,11 +45,48 @@ class PublishResult: MessageCallback = Callable[[Any], Coroutine[Any, Any, Any] | Any] -async def _get_broker(system: ActorSystem, topic: str) -> "ActorRef": - """Get topic broker (reuses queue/manager infrastructure)""" +async def _get_broker(system: ActorSystem, topic: str) -> "ActorProxy": + """Get topic broker proxy (reuses queue/manager infrastructure)""" + from pulsing.actor.remote import ActorProxy from pulsing.queue.manager import get_topic_broker - return await get_topic_broker(system, topic) + broker_ref = await get_topic_broker(system, topic) + # Wrap ActorRef with ActorProxy to enable direct method calls + return ActorProxy.from_ref( + broker_ref, + methods=["subscribe", "unsubscribe", "publish", "get_stats"], + async_methods={"subscribe", "unsubscribe", "publish"}, + ) + + +async def subscribe_to_topic( + system: ActorSystem, + topic: str, + subscriber_id: str, + actor_name: str, + node_id: int | None = None, +) -> dict: + """Subscribe an actor to a topic. + + This is a helper function for manually registering subscribers with a topic broker. + For normal usage, prefer using TopicReader which handles this automatically. + + Args: + system: ActorSystem instance + topic: Topic name + subscriber_id: Unique subscriber identifier + actor_name: Name of the actor to receive messages + node_id: Optional node ID (defaults to local node) + + Returns: + Response dict from broker + + Raises: + RuntimeError: If subscription fails + """ + broker = await _get_broker(system, topic) + # Direct method call on broker proxy + return await broker.subscribe(subscriber_id, actor_name, node_id) class TopicWriter: @@ -58,7 +96,7 @@ def __init__(self, system: ActorSystem, topic: str, writer_id: str | None = None self._system = system self._topic = topic self._writer_id = writer_id or f"writer_{uuid.uuid4().hex[:8]}" - self._broker: "ActorRef | None" = None + self._broker: "ActorProxy | None" = None @property def topic(self) -> str: @@ -68,7 +106,7 @@ def topic(self) -> str: def writer_id(self) -> str: return self._writer_id - async def _broker_ref(self) -> "ActorRef": + async def _broker_ref(self) -> "ActorProxy": if self._broker is None: self._broker = await _get_broker(self._system, self._topic) return self._broker @@ -101,23 +139,16 @@ async def publish( effective_timeout = timeout if timeout is not None else DEFAULT_PUBLISH_TIMEOUT async def _do_publish(): - return await broker.ask( - Message.from_json( - "Publish", - { - "payload": message, - "mode": mode.value, - "sender_id": self._writer_id, - }, - ) + # Direct method call on broker proxy + return await broker.publish( + message, + mode=mode.value, + sender_id=self._writer_id, + timeout=effective_timeout, ) - response = await asyncio.wait_for(_do_publish(), timeout=effective_timeout) + data = await asyncio.wait_for(_do_publish(), timeout=effective_timeout) - if response.msg_type == "Error": - raise RuntimeError(response.to_json().get("error")) - - data = response.to_json() return PublishResult( success=data.get("success", False), delivered=data.get("delivered", 0), @@ -129,8 +160,8 @@ async def _do_publish(): async def stats(self) -> dict[str, Any]: """Get topic statistics""" broker = await self._broker_ref() - response = await broker.ask(Message.from_json("GetStats", {})) - return response.to_json() + # Direct method call on broker proxy + return await broker.get_stats() class _SubscriberActor(Actor): @@ -237,23 +268,14 @@ async def start(self) -> None: subscriber, name=actor_name, public=True ) - # Register with broker + # Register with broker using direct method call broker = await _get_broker(self._system, self._topic) - response = await broker.ask( - Message.from_json( - "Subscribe", - { - "subscriber_id": self._reader_id, - "actor_name": actor_name, - # Use string for JSON serialization of large u128 integers - "node_id": str(self._system.node_id.id), - }, - ) + await broker.subscribe( + self._reader_id, + actor_name, + node_id=self._system.node_id.id, ) - if response.msg_type == "Error": - raise RuntimeError(f"Subscribe failed: {response.to_json().get('error')}") - self._started = True logger.debug(f"TopicReader[{self._reader_id}] started for topic: {self._topic}") @@ -262,12 +284,10 @@ async def stop(self) -> None: if not self._started: return - # Unsubscribe from broker + # Unsubscribe from broker using direct method call try: broker = await _get_broker(self._system, self._topic) - await broker.ask( - Message.from_json("Unsubscribe", {"subscriber_id": self._reader_id}) - ) + await broker.unsubscribe(self._reader_id) except Exception as e: logger.warning(f"Unsubscribe error: {e}") @@ -286,8 +306,8 @@ async def stop(self) -> None: async def stats(self) -> dict[str, Any]: """Get topic statistics""" broker = await _get_broker(self._system, self._topic) - response = await broker.ask(Message.from_json("GetStats", {})) - return response.to_json() + # Direct method call on broker proxy + return await broker.get_stats() async def write_topic( diff --git a/tests/python/test_queue.py b/tests/python/test_queue.py index 5946f7a63..4433f4824 100644 --- a/tests/python/test_queue.py +++ b/tests/python/test_queue.py @@ -946,43 +946,86 @@ async def test_data_integrity_under_stress(actor_system, temp_storage_path): @pytest.mark.asyncio async def test_bucket_storage_direct(actor_system, temp_storage_path): - """Test BucketStorage actor directly with memory backend.""" - storage = BucketStorage( + """Test BucketStorage actor directly with memory backend via proxy.""" + # Use BucketStorage.local() to create properly wrapped actor with proxy + bucket = await BucketStorage.local( + actor_system, bucket_id=0, storage_path=f"{temp_storage_path}/direct_bucket", batch_size=5, backend="memory", + name="test_bucket", ) - # Spawn actor - actor_ref = await actor_system.spawn(storage, name="test_bucket") - - from pulsing.actor import Message - - # Put records + # Put records via proxy method for i in range(10): - response = await actor_ref.ask( - Message.from_json("Put", {"record": {"id": f"test_{i}", "value": i}}) - ) - assert response.to_json().get("status") == "ok" + result = await bucket.put({"id": f"test_{i}", "value": i}) + assert result["status"] == "ok" - # Get stats - stats_response = await actor_ref.ask(Message.from_json("Stats", {})) - stats = stats_response.to_json() + # Get stats via proxy method + stats = await bucket.stats() assert stats["bucket_id"] == 0 assert stats["total_count"] == 10 assert stats["backend"] == "memory" # Flush (no-op for memory backend) - await actor_ref.ask(Message.from_json("Flush", {})) + await bucket.flush() # Data should still be there - stats_response = await actor_ref.ask(Message.from_json("Stats", {})) - stats = stats_response.to_json() + stats = await bucket.stats() assert stats["total_count"] == 10 +@pytest.mark.asyncio +async def test_bucket_storage_get(actor_system, temp_storage_path): + """Test BucketStorage get method via proxy.""" + bucket = await BucketStorage.local( + actor_system, + bucket_id=0, + storage_path=f"{temp_storage_path}/get_bucket", + batch_size=5, + backend="memory", + name="test_bucket_get", + ) + + # Put records + for i in range(10): + await bucket.put({"id": f"test_{i}", "value": i}) + + # Get records via proxy + records = await bucket.get(limit=10, offset=0) + assert len(records) == 10 + + # Get with limit + records = await bucket.get(limit=5) + assert len(records) == 5 + + +@pytest.mark.asyncio +async def test_bucket_storage_put_batch(actor_system, temp_storage_path): + """Test BucketStorage put_batch method via proxy.""" + bucket = await BucketStorage.local( + actor_system, + bucket_id=0, + storage_path=f"{temp_storage_path}/batch_bucket", + batch_size=100, + backend="memory", + name="test_bucket_batch", + ) + + # Put batch of records + records = [{"id": f"batch_{i}", "value": i * 10} for i in range(20)] + result = await bucket.put_batch(records) + + assert result["status"] == "ok" + assert result["count"] == 20 + + # Verify via stats + stats = await bucket.stats() + assert stats["total_count"] == 20 + + # ============================================================================ # Sync Queue Tests # ============================================================================ diff --git a/tests/python/test_queue_backends.py b/tests/python/test_queue_backends.py index 45ab2e72e..9d9b5ce85 100644 --- a/tests/python/test_queue_backends.py +++ b/tests/python/test_queue_backends.py @@ -249,28 +249,24 @@ class TestBucketStorageWithBackend: async def test_bucket_storage_with_memory_backend( self, actor_system, temp_storage_path ): - """Test BucketStorage with memory backend.""" - from pulsing.actor import Message - - storage = BucketStorage( + """Test BucketStorage with memory backend via proxy.""" + # Use BucketStorage.local() for proper @remote wrapping + bucket = await BucketStorage.local( + actor_system, bucket_id=0, storage_path=f"{temp_storage_path}/bucket_memory", batch_size=10, backend="memory", + name="bucket_memory_test", ) - actor_ref = await actor_system.spawn(storage, name="bucket_memory_test") - - # Put records + # Put records via proxy method for i in range(5): - response = await actor_ref.ask( - Message.from_json("Put", {"record": {"id": f"test_{i}", "value": i}}) - ) - assert response.to_json().get("status") == "ok" + result = await bucket.put({"id": f"test_{i}", "value": i}) + assert result["status"] == "ok" - # Get stats - stats_response = await actor_ref.ask(Message.from_json("Stats", {})) - stats = stats_response.to_json() + # Get stats via proxy method + stats = await bucket.stats() assert stats["bucket_id"] == 0 assert stats["total_count"] == 5 @@ -278,30 +274,25 @@ async def test_bucket_storage_with_memory_backend( @pytest.mark.asyncio async def test_bucket_storage_put_batch(self, actor_system, temp_storage_path): - """Test BucketStorage PutBatch message.""" - from pulsing.actor import Message - - storage = BucketStorage( + """Test BucketStorage put_batch method via proxy.""" + # Use BucketStorage.local() for proper @remote wrapping + bucket = await BucketStorage.local( + actor_system, bucket_id=0, storage_path=f"{temp_storage_path}/bucket_batch", batch_size=100, backend="memory", + name="bucket_batch_test", ) - actor_ref = await actor_system.spawn(storage, name="bucket_batch_test") - - # Put batch + # Put batch via proxy method records = [{"id": f"batch_{i}", "value": i} for i in range(10)] - response = await actor_ref.ask( - Message.from_json("PutBatch", {"records": records}) - ) - result = response.to_json() - assert result.get("status") == "ok" - assert result.get("count") == 10 + result = await bucket.put_batch(records) + assert result["status"] == "ok" + assert result["count"] == 10 - # Verify - stats_response = await actor_ref.ask(Message.from_json("Stats", {})) - stats = stats_response.to_json() + # Verify via stats + stats = await bucket.stats() assert stats["total_count"] == 10 @@ -472,8 +463,7 @@ def total_count(self) -> int: async def test_custom_backend_with_bucket_storage( self, actor_system, temp_storage_path ): - """Test custom backend with BucketStorage actor.""" - from pulsing.actor import Message + """Test custom backend with BucketStorage actor via proxy.""" class TrackingBackend: """Backend that tracks all operations.""" @@ -530,24 +520,24 @@ def total_count(self) -> int: # Register and use register_backend("tracking", TrackingBackend) - storage = BucketStorage( + # Use BucketStorage.local() for proper @remote wrapping + bucket = await BucketStorage.local( + actor_system, bucket_id=0, storage_path=f"{temp_storage_path}/tracking_test", batch_size=100, backend="tracking", + name="tracking_bucket", ) - actor_ref = await actor_system.spawn(storage, name="tracking_bucket") - - # Perform operations - await actor_ref.ask(Message.from_json("Put", {"record": {"id": "1"}})) - await actor_ref.ask(Message.from_json("Put", {"record": {"id": "2"}})) - await actor_ref.ask(Message.from_json("Get", {"limit": 10, "offset": 0})) - await actor_ref.ask(Message.from_json("Flush", {})) + # Perform operations via proxy methods + await bucket.put({"id": "1"}) + await bucket.put({"id": "2"}) + await bucket.get(limit=10, offset=0) + await bucket.flush() # Check tracking - stats_response = await actor_ref.ask(Message.from_json("Stats", {})) - stats = stats_response.to_json() + stats = await bucket.stats() assert stats["backend"] == "tracking" assert "put" in stats["operations"] diff --git a/tests/python/test_system_actor.py b/tests/python/test_system_actor.py index 12e5cb44e..527bdaf05 100644 --- a/tests/python/test_system_actor.py +++ b/tests/python/test_system_actor.py @@ -2,23 +2,16 @@ Tests for SystemActor functionality. Covers: -- Rust SystemActor (system/core) operations -- Python ActorService (_python_actor_service) operations -- System helper functions (list_actors, get_metrics, etc.) +- Rust SystemActor (system/core) operations via SystemActorProxy +- Python ActorService (system/python_actor_service) operations via PythonActorServiceProxy """ import asyncio import pytest import pulsing as pul from pulsing.actor import ( - Actor, - ActorId, - Message, - list_actors, - get_metrics, - get_node_info, - health_check, - ping, + get_python_actor_service, + get_system_actor, remote, ) @@ -36,6 +29,18 @@ async def system(): await system.shutdown() +@pytest.fixture +async def sys_proxy(system): + """Create a SystemActorProxy for the test system.""" + return await get_system_actor(system) + + +@pytest.fixture +async def service_proxy(system): + """Create a PythonActorServiceProxy for the test system.""" + return await get_python_actor_service(system) + + # ============================================================================ # Test: System Auto-Registration # ============================================================================ @@ -58,27 +63,28 @@ async def test_python_actor_service_auto_registered(system): # ============================================================================ -# Test: SystemActor Reference +# Test: SystemActorProxy # ============================================================================ @pytest.mark.asyncio -async def test_get_system_actor_reference(system): - """Should be able to get SystemActor reference.""" - sys_ref = await system.system() - assert sys_ref is not None - assert sys_ref.is_local() +async def test_get_system_actor_proxy(system): + """Should be able to get SystemActorProxy.""" + sys_proxy = await get_system_actor(system) + assert sys_proxy is not None + assert sys_proxy.ref is not None + assert sys_proxy.ref.is_local() # ============================================================================ -# Test: Ping +# Test: Ping via Proxy # ============================================================================ @pytest.mark.asyncio -async def test_ping_local(system): - """Ping should return Pong with node info.""" - result = await ping(system) +async def test_ping_via_proxy(sys_proxy, system): + """Ping via SystemActorProxy should return Pong with node info.""" + result = await sys_proxy.ping() assert result["type"] == "Pong" assert "node_id" in result @@ -87,28 +93,15 @@ async def test_ping_local(system): assert int(result["node_id"]) == system.node_id.id -@pytest.mark.asyncio -async def test_ping_direct_message(system): - """Direct ping message to SystemActor.""" - sys_ref = await system.system() - msg = Message.from_json("SystemMessage", {"type": "Ping"}) - resp = await sys_ref.ask(msg) - data = resp.to_json() - - assert data["type"] == "Pong" - # node_id is serialized as string in JSON for u128 precision - assert int(data["node_id"]) == system.node_id.id - - # ============================================================================ -# Test: Health Check +# Test: Health Check via Proxy # ============================================================================ @pytest.mark.asyncio -async def test_health_check(system): - """Health check should return healthy status.""" - result = await health_check(system) +async def test_health_check_via_proxy(sys_proxy): + """Health check via SystemActorProxy should return healthy status.""" + result = await sys_proxy.health_check() assert result["type"] == "Health" assert result["status"] == "healthy" @@ -116,27 +109,15 @@ async def test_health_check(system): assert "uptime_secs" in result -@pytest.mark.asyncio -async def test_health_check_direct_message(system): - """Direct health check message to SystemActor.""" - sys_ref = await system.system() - msg = Message.from_json("SystemMessage", {"type": "HealthCheck"}) - resp = await sys_ref.ask(msg) - data = resp.to_json() - - assert data["type"] == "Health" - assert data["status"] == "healthy" - - # ============================================================================ -# Test: Get Node Info +# Test: Get Node Info via Proxy # ============================================================================ @pytest.mark.asyncio -async def test_get_node_info(system): - """Should return node information.""" - result = await get_node_info(system) +async def test_get_node_info_via_proxy(sys_proxy, system): + """Should return node information via SystemActorProxy.""" + result = await sys_proxy.get_node_info() assert result["type"] == "NodeInfo" # node_id is serialized as string in JSON for u128 precision @@ -146,9 +127,9 @@ async def test_get_node_info(system): @pytest.mark.asyncio -async def test_get_node_info_address_format(system): +async def test_get_node_info_address_format(sys_proxy): """Node address should be in IP:port format.""" - result = await get_node_info(system) + result = await sys_proxy.get_node_info() addr = result["addr"] # Should contain port separator @@ -156,14 +137,14 @@ async def test_get_node_info_address_format(system): # ============================================================================ -# Test: Get Metrics +# Test: Get Metrics via Proxy # ============================================================================ @pytest.mark.asyncio -async def test_get_metrics(system): - """Should return system metrics.""" - result = await get_metrics(system) +async def test_get_metrics_via_proxy(sys_proxy): + """Should return system metrics via SystemActorProxy.""" + result = await sys_proxy.get_metrics() assert result["type"] == "Metrics" assert "actors_count" in result @@ -174,113 +155,61 @@ async def test_get_metrics(system): @pytest.mark.asyncio -async def test_metrics_message_count_increases(system): +async def test_metrics_message_count_increases(sys_proxy): """Message count should increase with each message.""" # Get initial count - result1 = await get_metrics(system) + result1 = await sys_proxy.get_metrics() initial_count = result1["messages_total"] # Send a few more messages - await ping(system) - await ping(system) + await sys_proxy.ping() + await sys_proxy.ping() # Get new count - result2 = await get_metrics(system) + result2 = await sys_proxy.get_metrics() new_count = result2["messages_total"] assert new_count > initial_count # ============================================================================ -# Test: List Actors +# Test: List Actors via Proxy # ============================================================================ @pytest.mark.asyncio -async def test_list_actors_empty_initially(system): +async def test_list_actors_via_proxy(sys_proxy): """Actor list should be empty initially (only system actors).""" - result = await list_actors(system) + result = await sys_proxy.list_actors() # Should be empty or only contain system actors assert isinstance(result, list) -@pytest.mark.asyncio -async def test_list_actors_direct_message(system): - """Direct ListActors message to SystemActor.""" - sys_ref = await system.system() - msg = Message.from_json("SystemMessage", {"type": "ListActors"}) - resp = await sys_ref.ask(msg) - data = resp.to_json() - - assert data["type"] == "ActorList" - assert "actors" in data - - # ============================================================================ -# Test: GetActor +# Test: PythonActorServiceProxy # ============================================================================ @pytest.mark.asyncio -async def test_get_actor_not_found(system): - """GetActor should return error for non-existent actor.""" - sys_ref = await system.system() - msg = Message.from_json( - "SystemMessage", {"type": "GetActor", "name": "nonexistent"} - ) - resp = await sys_ref.ask(msg) - data = resp.to_json() - - assert data["type"] == "Error" - assert "not found" in data["message"].lower() - - -# ============================================================================ -# Test: CreateActor (should fail in pure Rust mode) -# ============================================================================ +async def test_get_python_actor_service_proxy(system): + """Should be able to get PythonActorServiceProxy.""" + service_proxy = await get_python_actor_service(system) + assert service_proxy is not None + assert service_proxy.ref is not None @pytest.mark.asyncio -async def test_create_actor_not_supported_in_rust(system): - """CreateActor should return error in pure Rust SystemActor.""" - sys_ref = await system.system() - msg = Message.from_json( - "SystemMessage", - { - "type": "CreateActor", - "actor_type": "Counter", - "name": "test_counter", - "params": {}, - "public": True, - }, - ) - resp = await sys_ref.ask(msg) - data = resp.to_json() - - assert data["type"] == "Error" - assert "not supported" in data["message"].lower() +async def test_list_registry_via_proxy(service_proxy): + """PythonActorServiceProxy should list registered actor classes.""" + classes = await service_proxy.list_registry() - -# ============================================================================ -# Test: PythonActorService -# ============================================================================ - - -@pytest.mark.asyncio -async def test_python_actor_service_list_registry(system): - """PythonActorService should list registered actor classes.""" - service_ref = await system.resolve_named("system/python_actor_service") - msg = Message.from_json("ListRegistry", {}) - resp = await service_ref.ask(msg) - data = resp.to_json() - - assert data.get("classes") is not None - assert isinstance(data["classes"], list) + assert classes is not None + assert isinstance(classes, list) # ============================================================================ -# Test: @remote with PythonActorService +# Test: @remote with PythonActorServiceProxy # ============================================================================ @@ -313,27 +242,23 @@ async def test_remote_local_creation(system): @pytest.mark.asyncio -async def test_remote_class_registered(system): +async def test_remote_class_registered(service_proxy): """@remote decorated class should be registered in global registry.""" - service_ref = await system.resolve_named("system/python_actor_service") - msg = Message.from_json("ListRegistry", {}) - resp = await service_ref.ask(msg) - data = resp.to_json() + classes = await service_proxy.list_registry() # TestCounter should be in the registry - class_names = data.get("classes", []) - assert any("TestCounter" in name for name in class_names) + assert any("TestCounter" in name for name in classes) # ============================================================================ -# Test: Multiple Concurrent Requests +# Test: Multiple Concurrent Requests via Proxy # ============================================================================ @pytest.mark.asyncio -async def test_concurrent_ping_requests(system): - """SystemActor should handle concurrent requests.""" - tasks = [ping(system) for _ in range(10)] +async def test_concurrent_ping_requests(sys_proxy, system): + """SystemActor should handle concurrent requests via proxy.""" + tasks = [sys_proxy.ping() for _ in range(10)] results = await asyncio.gather(*tasks) for result in results: @@ -343,14 +268,14 @@ async def test_concurrent_ping_requests(system): @pytest.mark.asyncio -async def test_concurrent_mixed_requests(system): - """SystemActor should handle mixed concurrent requests.""" +async def test_concurrent_mixed_requests(sys_proxy): + """SystemActor should handle mixed concurrent requests via proxy.""" tasks = [ - ping(system), - health_check(system), - get_node_info(system), - get_metrics(system), - list_actors(system), + sys_proxy.ping(), + sys_proxy.health_check(), + sys_proxy.get_node_info(), + sys_proxy.get_metrics(), + sys_proxy.list_actors(), ] results = await asyncio.gather(*tasks) @@ -362,49 +287,50 @@ async def test_concurrent_mixed_requests(system): # ============================================================================ -# Test: Error Handling +# Test: Uptime via Proxy # ============================================================================ @pytest.mark.asyncio -async def test_invalid_message_type(system): - """SystemActor should handle invalid message types gracefully.""" - sys_ref = await system.system() - msg = Message.from_json("SystemMessage", {"type": "InvalidType"}) - resp = await sys_ref.ask(msg) - data = resp.to_json() - - # Should return error for unknown message type - assert data["type"] == "Error" +async def test_uptime_increases(sys_proxy): + """Uptime should increase over time.""" + result1 = await sys_proxy.get_node_info() + uptime1 = result1["uptime_secs"] + await asyncio.sleep(1.1) -@pytest.mark.asyncio -async def test_malformed_message(system): - """SystemActor should handle malformed messages gracefully.""" - sys_ref = await system.system() - # Send a message without proper format - msg = Message.from_json("BadMessage", {"foo": "bar"}) - resp = await sys_ref.ask(msg) - data = resp.to_json() + result2 = await sys_proxy.get_node_info() + uptime2 = result2["uptime_secs"] - # Should return error - assert data["type"] == "Error" + assert uptime2 >= uptime1 # ============================================================================ -# Test: Uptime +# Test: Remote Node Access via Proxy # ============================================================================ @pytest.mark.asyncio -async def test_uptime_increases(system): - """Uptime should increase over time.""" - result1 = await get_node_info(system) - uptime1 = result1["uptime_secs"] +async def test_get_system_actor_for_remote_node(system): + """get_system_actor with node_id should work (for cluster scenarios).""" + # For local testing, use local node's ID + local_node_id = system.node_id.id - await asyncio.sleep(1.1) + # This should work even with local node_id + sys_proxy = await get_system_actor(system, node_id=local_node_id) + result = await sys_proxy.ping() - result2 = await get_node_info(system) - uptime2 = result2["uptime_secs"] + assert result["type"] == "Pong" - assert uptime2 >= uptime1 + +@pytest.mark.asyncio +async def test_get_python_actor_service_for_remote_node(system): + """get_python_actor_service with node_id should work (for cluster scenarios).""" + # For local testing, use local node's ID + local_node_id = system.node_id.id + + # This should work even with local node_id + service_proxy = await get_python_actor_service(system, node_id=local_node_id) + classes = await service_proxy.list_registry() + + assert isinstance(classes, list) diff --git a/tests/python/test_topic.py b/tests/python/test_topic.py index 431350611..d52cc24c7 100644 --- a/tests/python/test_topic.py +++ b/tests/python/test_topic.py @@ -894,20 +894,11 @@ async def receive(self, msg): actor_name = "_topic_sub_timeout_error_topic_slow_sub" await actor_system.spawn(slow_actor, name=actor_name, public=True) - # Register with broker - from pulsing.queue.manager import get_topic_broker - from pulsing.actor import Message + # Register with broker using helper function + from pulsing.topic import subscribe_to_topic - broker = await get_topic_broker(actor_system, "timeout_error_topic") - await broker.ask( - Message.from_json( - "Subscribe", - { - "subscriber_id": "slow_sub", - "actor_name": actor_name, - "node_id": actor_system.node_id.id, - }, - ) + await subscribe_to_topic( + actor_system, "timeout_error_topic", "slow_sub", actor_name ) # Publish with very short timeout - should timeout @@ -1024,8 +1015,7 @@ async def test_subscriber_failure_threshold_eviction(actor_system): Verify P0-3 fix: Subscribers are automatically evicted after 3 consecutive failures. """ - from pulsing.actor import Actor, ActorId, Message - from pulsing.queue.manager import get_topic_broker + from pulsing.actor import Actor, ActorId from pulsing.topic.broker import MAX_CONSECUTIVE_FAILURES # Verify configuration constants @@ -1048,17 +1038,11 @@ async def receive(self, msg): actor_name = "_topic_sub_eviction_test_topic_failing" await actor_system.spawn(failing_actor, name=actor_name, public=True) - # Register failing subscriber with broker - broker = await get_topic_broker(actor_system, "eviction_test_topic") - await broker.ask( - Message.from_json( - "Subscribe", - { - "subscriber_id": "failing_sub", - "actor_name": actor_name, - "node_id": actor_system.node_id.id, - }, - ) + # Register failing subscriber with broker using helper function + from pulsing.topic import subscribe_to_topic + + await subscribe_to_topic( + actor_system, "eviction_test_topic", "failing_sub", actor_name ) # Get initial statistics From 7316616cd6137a1f9f5f9e4aafd347efce43d78e Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 25 Jan 2026 22:21:11 +0800 Subject: [PATCH 6/7] Refactor StorageManager to utilize remote methods and improve message handling - Converted StorageManager from an Actor to a remote class, enabling direct method calls. - Replaced message-based communication with explicit method calls for bucket and topic management. - Updated related methods to enhance clarity and maintainability, including get_bucket, get_topic, list_buckets, and get_stats. - Adjusted tests to validate the new proxy method interactions, ensuring proper functionality across scenarios. --- python/pulsing/queue/manager.py | 371 +++++++++++++++----------------- python/pulsing/topic/topic.py | 10 +- tests/python/test_topic.py | 16 +- 3 files changed, 178 insertions(+), 219 deletions(-) diff --git a/python/pulsing/queue/manager.py b/python/pulsing/queue/manager.py index b0cbf8a95..023277301 100644 --- a/python/pulsing/queue/manager.py +++ b/python/pulsing/queue/manager.py @@ -5,7 +5,7 @@ import logging from typing import TYPE_CHECKING, Any -from pulsing.actor import Actor, ActorId, ActorRef, ActorSystem, Message +from pulsing.actor import ActorId, ActorRef, ActorSystem, remote from .storage import BucketStorage @@ -60,7 +60,8 @@ def _compute_owner(bucket_key: str, nodes: list[dict]) -> int | None: return best_node_id -class StorageManager(Actor): +@remote +class StorageManager: """Storage manager Actor One instance per node, responsible for: @@ -194,188 +195,169 @@ async def _get_or_create_topic_broker(self, topic_name: str) -> ActorRef: return self._topics[topic_name] - async def receive(self, msg: Message) -> Message | None: - try: - return await self._handle_message(msg) - except Exception as e: - logger.exception(f"Error handling message: {e}") - return Message.from_json("Error", {"error": str(e)}) - - async def _handle_message(self, msg: Message) -> Message | None: - msg_type = msg.msg_type - data = msg.to_json() - - if msg_type == "GetBucket": - # Request bucket reference - topic = data.get("topic") - bucket_id = data.get("bucket_id") - batch_size = data.get("batch_size", 100) - storage_path = data.get("storage_path") # Optional custom storage path - backend = data.get("backend") # Optional backend name - backend_options = data.get("backend_options") # Optional backend options - - if topic is None or bucket_id is None: - return Message.from_json( - "Error", {"error": "Missing 'topic' or 'bucket_id'"} - ) - - # Compute owner - bucket_key = self._bucket_key(topic, bucket_id) - members = await self._refresh_members() - owner_node_id = _compute_owner(bucket_key, members) - # node_id.id returns u128, convert to string for comparison with members - local_node_id = str(self.system.node_id.id) - - # Determine if belongs to this node (string comparison) - if owner_node_id is None or owner_node_id == local_node_id: - # This node is responsible, create/return bucket - bucket_ref = await self._get_or_create_bucket( - topic, bucket_id, batch_size, storage_path, backend, backend_options - ) - return Message.from_json( - "BucketReady", - { - "_type": "BucketReady", # Fallback: msg_type may be lost across nodes - "topic": topic, - "bucket_id": bucket_id, - # Use string for JSON serialization of large u128 integers - "actor_id": str(bucket_ref.actor_id.id), - "node_id": str(local_node_id), - }, - ) - else: - # Not owned by this node, return redirect - # Find owner node address - owner_addr = None - for m in members: - m_node_id = m.get("node_id") - if m_node_id is not None and m_node_id == owner_node_id: - owner_addr = m.get("addr") - break - - return Message.from_json( - "Redirect", - { - "_type": "Redirect", # Fallback: msg_type may be lost across nodes - "topic": topic, - "bucket_id": bucket_id, - # Use string for JSON serialization of large integers - "owner_node_id": str(owner_node_id), - "owner_addr": owner_addr, - }, - ) - - elif msg_type == "GetTopic": - # Request topic broker reference - topic_name = data.get("topic") - if not topic_name: - return Message.from_json("Error", {"error": "Missing 'topic'"}) - - # Compute owner - topic_key = self._topic_key(topic_name) - members = await self._refresh_members() - owner_node_id = _compute_owner(topic_key, members) - # node_id.id returns u128, convert to string for comparison with members - local_node_id = str(self.system.node_id.id) - - if owner_node_id is None or owner_node_id == local_node_id: - # This node is responsible, create/return topic broker - broker_ref = await self._get_or_create_topic_broker(topic_name) - return Message.from_json( - "TopicReady", - { - "_type": "TopicReady", - "topic": topic_name, - # Use string for JSON serialization of large u128 integers - "actor_id": str(broker_ref.actor_id.id), - "node_id": str(local_node_id), - }, - ) - else: - # Not owned by this node, return redirect - owner_addr = None - for m in members: - m_node_id = m.get("node_id") - if m_node_id is not None and m_node_id == owner_node_id: - owner_addr = m.get("addr") - break - - return Message.from_json( - "Redirect", - { - "_type": "Redirect", - "topic": topic_name, - # Use string for JSON serialization of large integers - "owner_node_id": str(owner_node_id), - "owner_addr": owner_addr, - }, - ) + # ========== Public Remote Methods ========== - elif msg_type == "ListBuckets": - # List all buckets managed by this node - buckets = [ - {"topic": topic, "bucket_id": bid} - for (topic, bid) in self._buckets.keys() - ] - return Message.from_json("BucketList", {"buckets": buckets}) - - elif msg_type == "ListTopics": - # List all topics managed by this node - return Message.from_json("TopicList", {"topics": list(self._topics.keys())}) - - elif msg_type == "GetStats": - # Get statistics - return Message.from_json( - "Stats", - { - # Use string for JSON serialization of large u128 integers - "node_id": str(self.system.node_id.id), - "bucket_count": len(self._buckets), - "topic_count": len(self._topics), - "buckets": [ - {"topic": t, "bucket_id": b} for (t, b) in self._buckets.keys() - ], - "topics": list(self._topics.keys()), - }, + async def get_bucket( + self, + topic: str, + bucket_id: int, + batch_size: int = 100, + storage_path: str | None = None, + backend: str | None = None, + backend_options: dict | None = None, + ) -> dict: + """Get bucket reference. + + Returns: + - {"_type": "BucketReady", "topic": ..., "bucket_id": ..., "actor_id": ..., "node_id": ...} + - {"_type": "Redirect", "topic": ..., "bucket_id": ..., "owner_node_id": ..., "owner_addr": ...} + """ + # Compute owner + bucket_key = self._bucket_key(topic, bucket_id) + members = await self._refresh_members() + owner_node_id = _compute_owner(bucket_key, members) + local_node_id = str(self.system.node_id.id) + + if owner_node_id is None or owner_node_id == local_node_id: + # This node is responsible, create/return bucket + bucket_ref = await self._get_or_create_bucket( + topic, bucket_id, batch_size, storage_path, backend, backend_options ) + return { + "_type": "BucketReady", + "topic": topic, + "bucket_id": bucket_id, + "actor_id": str(bucket_ref.actor_id.id), + "node_id": str(local_node_id), + } + else: + # Not owned by this node, return redirect + owner_addr = None + for m in members: + m_node_id = m.get("node_id") + if m_node_id is not None and m_node_id == owner_node_id: + owner_addr = m.get("addr") + break + return { + "_type": "Redirect", + "topic": topic, + "bucket_id": bucket_id, + "owner_node_id": str(owner_node_id), + "owner_addr": owner_addr, + } + + async def get_topic(self, topic: str) -> dict: + """Get topic broker reference. + + Returns: + - {"_type": "TopicReady", "topic": ..., "actor_id": ..., "node_id": ...} + - {"_type": "Redirect", "topic": ..., "owner_node_id": ..., "owner_addr": ...} + """ + # Compute owner + topic_key = self._topic_key(topic) + members = await self._refresh_members() + owner_node_id = _compute_owner(topic_key, members) + local_node_id = str(self.system.node_id.id) + + if owner_node_id is None or owner_node_id == local_node_id: + # This node is responsible, create/return topic broker + broker_ref = await self._get_or_create_topic_broker(topic) + return { + "_type": "TopicReady", + "topic": topic, + "actor_id": str(broker_ref.actor_id.id), + "node_id": str(local_node_id), + } else: - return Message.from_json( - "Error", {"error": f"Unknown message type: {msg_type}"} - ) + # Not owned by this node, return redirect + owner_addr = None + for m in members: + m_node_id = m.get("node_id") + if m_node_id is not None and m_node_id == owner_node_id: + owner_addr = m.get("addr") + break + + return { + "_type": "Redirect", + "topic": topic, + "owner_node_id": str(owner_node_id), + "owner_addr": owner_addr, + } + + async def list_buckets(self) -> list[dict]: + """List all buckets managed by this node. + + Returns: + List of {"topic": ..., "bucket_id": ...} + """ + return [ + {"topic": topic, "bucket_id": bid} for (topic, bid) in self._buckets.keys() + ] + + async def list_topics(self) -> list[str]: + """List all topics managed by this node. + + Returns: + List of topic names + """ + return list(self._topics.keys()) + + async def get_stats(self) -> dict: + """Get storage manager statistics. + + Returns: + {"node_id": ..., "bucket_count": ..., "topic_count": ..., "buckets": [...], "topics": [...]} + """ + return { + "node_id": str(self.system.node_id.id), + "bucket_count": len(self._buckets), + "topic_count": len(self._topics), + "buckets": [ + {"topic": t, "bucket_id": b} for (t, b) in self._buckets.keys() + ], + "topics": list(self._topics.keys()), + } # Lock to prevent concurrent creation of StorageManager _manager_lock = asyncio.Lock() -async def get_storage_manager(system: ActorSystem) -> ActorRef: - """Get StorageManager for this node, create if not exists""" +async def get_storage_manager(system: ActorSystem) -> "ActorProxy": + """Get StorageManager proxy for this node, create if not exists. + + Returns: + ActorProxy for direct method calls on StorageManager + """ local_node_id = system.node_id.id # Try to resolve local node's StorageManager try: - return await system.resolve_named(STORAGE_MANAGER_NAME, node_id=local_node_id) + return await StorageManager.resolve( + STORAGE_MANAGER_NAME, system=system, node_id=local_node_id + ) except Exception: pass async with _manager_lock: # Check local node again try: - return await system.resolve_named( - STORAGE_MANAGER_NAME, node_id=local_node_id + return await StorageManager.resolve( + STORAGE_MANAGER_NAME, system=system, node_id=local_node_id ) except Exception: pass - # Create new StorageManager + # Create new StorageManager using .local() try: - manager = StorageManager(system) - return await system.spawn(manager, name=STORAGE_MANAGER_NAME, public=True) + return await StorageManager.local( + system, system, name=STORAGE_MANAGER_NAME, public=True + ) except Exception as e: if "already exists" in str(e).lower(): - return await system.resolve_named( - STORAGE_MANAGER_NAME, node_id=local_node_id + return await StorageManager.resolve( + STORAGE_MANAGER_NAME, system=system, node_id=local_node_id ) raise @@ -419,27 +401,23 @@ async def get_bucket_ref( # Request from local StorageManager first manager = await get_storage_manager(system) - for redirect_count in range(max_redirects + 1): - msg_data = { - "topic": topic, - "bucket_id": bucket_id, - "batch_size": batch_size, - } - if storage_path: - msg_data["storage_path"] = storage_path - if backend: - # If it's a class, pass class name (classes cannot be serialized across nodes) - msg_data["backend"] = ( - backend if isinstance(backend, str) else backend.__name__ - ) - if backend_options: - msg_data["backend_options"] = backend_options + # Convert backend class to name if needed + backend_name = None + if backend: + backend_name = backend if isinstance(backend, str) else backend.__name__ - response = await manager.ask(Message.from_json("GetBucket", msg_data)) - - resp_data = response.to_json() - # msg_type may be lost across nodes, use _type field as fallback - msg_type = response.msg_type or resp_data.get("_type", "") + for redirect_count in range(max_redirects + 1): + # Call manager.get_bucket() via proxy + resp_data = await manager.get_bucket( + topic=topic, + bucket_id=bucket_id, + batch_size=batch_size, + storage_path=storage_path, + backend=backend_name, + backend_options=backend_options, + ) + + msg_type = resp_data.get("_type", "") if msg_type == "BucketReady": # Successfully got bucket - resolve by actor name for typed proxy @@ -471,8 +449,8 @@ async def get_bucket_ref( max_resolve_retries = 10 for resolve_retry in range(max_resolve_retries): try: - manager = await system.resolve_named( - STORAGE_MANAGER_NAME, node_id=owner_node_id + manager = await StorageManager.resolve( + STORAGE_MANAGER_NAME, system=system, node_id=owner_node_id ) break except Exception as e: @@ -488,9 +466,6 @@ async def get_bucket_ref( f"{max_resolve_retries} retries: {e}" ) from e - elif msg_type == "Error": - raise RuntimeError(f"GetBucket failed: {resp_data.get('error')}") - else: raise RuntimeError(f"Unexpected response: {msg_type}") @@ -501,33 +476,30 @@ async def get_topic_broker( system: ActorSystem, topic: str, max_redirects: int = 3, -) -> ActorRef: - """Get broker ActorRef for specified topic +) -> "ActorProxy": + """Get broker ActorProxy for specified topic Automatically handles redirects to ensure getting the broker on the correct node. + Returns ActorProxy for direct method calls on TopicBroker. Args: system: Actor system topic: Topic name max_redirects: Maximum redirect count """ - from pulsing.actor import ActorId, NodeId + from pulsing.topic.broker import TopicBroker manager = await get_storage_manager(system) for redirect_count in range(max_redirects + 1): - response = await manager.ask(Message.from_json("GetTopic", {"topic": topic})) - - resp_data = response.to_json() - msg_type = response.msg_type or resp_data.get("_type", "") + # Call manager.get_topic() via proxy + resp_data = await manager.get_topic(topic=topic) + msg_type = resp_data.get("_type", "") if msg_type == "TopicReady": - actor_id = resp_data["actor_id"] - # actor_id is now a UUID (u128), transmitted as string - if isinstance(actor_id, str): - actor_id = int(actor_id) - broker_actor_id = ActorId(actor_id) - return await system.actor_ref(broker_actor_id) + # Successfully got topic - resolve by actor name for typed proxy + actor_name = f"_topic_broker_{topic}" + return await TopicBroker.resolve(actor_name, system=system) elif msg_type == "Redirect": # owner_node_id transmitted as string, convert to int @@ -541,11 +513,11 @@ async def get_topic_broker( if owner_node_id == system.node_id.id: raise RuntimeError(f"Redirect loop for topic: {topic}") - # Get owner node's StorageManager + # Get owner node's StorageManager via proxy for retry in range(10): try: - manager = await system.resolve_named( - STORAGE_MANAGER_NAME, node_id=owner_node_id + manager = await StorageManager.resolve( + STORAGE_MANAGER_NAME, system=system, node_id=owner_node_id ) break except Exception as e: @@ -556,9 +528,6 @@ async def get_topic_broker( f"StorageManager not found on node {owner_node_id}: {e}" ) from e - elif msg_type == "Error": - raise RuntimeError(f"GetTopic failed: {resp_data.get('error')}") - else: raise RuntimeError(f"Unexpected response: {msg_type}") diff --git a/python/pulsing/topic/topic.py b/python/pulsing/topic/topic.py index 4959900af..caeea056c 100644 --- a/python/pulsing/topic/topic.py +++ b/python/pulsing/topic/topic.py @@ -47,16 +47,10 @@ class PublishResult: async def _get_broker(system: ActorSystem, topic: str) -> "ActorProxy": """Get topic broker proxy (reuses queue/manager infrastructure)""" - from pulsing.actor.remote import ActorProxy from pulsing.queue.manager import get_topic_broker - broker_ref = await get_topic_broker(system, topic) - # Wrap ActorRef with ActorProxy to enable direct method calls - return ActorProxy.from_ref( - broker_ref, - methods=["subscribe", "unsubscribe", "publish", "get_stats"], - async_methods={"subscribe", "unsubscribe", "publish"}, - ) + # get_topic_broker already returns ActorProxy (via TopicBroker.resolve) + return await get_topic_broker(system, topic) async def subscribe_to_topic( diff --git a/tests/python/test_topic.py b/tests/python/test_topic.py index d52cc24c7..2871eaf2d 100644 --- a/tests/python/test_topic.py +++ b/tests/python/test_topic.py @@ -729,7 +729,6 @@ async def test_double_start_stop(actor_system): async def test_topic_broker_via_storage_manager(actor_system): """Test that topic broker is created via StorageManager.""" from pulsing.queue.manager import get_storage_manager - from pulsing.actor import Message # Ensure StorageManager exists manager = await get_storage_manager(actor_system) @@ -738,9 +737,8 @@ async def test_topic_broker_via_storage_manager(actor_system): writer = await write_topic(actor_system, "sm_integration_topic") await writer.publish({"test": True}) - # Check stats include topics - response = await manager.ask(Message.from_json("GetStats", {})) - stats = response.to_json() + # Check stats include topics via proxy method + stats = await manager.get_stats() assert "topic_count" in stats assert stats["topic_count"] >= 1 @@ -751,7 +749,6 @@ async def test_topic_broker_via_storage_manager(actor_system): async def test_list_topics(actor_system): """Test listing topics via StorageManager.""" from pulsing.queue.manager import get_storage_manager - from pulsing.actor import Message # Create some topics await write_topic(actor_system, "list_topic_1") @@ -763,13 +760,12 @@ async def test_list_topics(actor_system): await w1.publish({"test": 1}) await w2.publish({"test": 2}) + # List topics via proxy method manager = await get_storage_manager(actor_system) - response = await manager.ask(Message.from_json("ListTopics", {})) - data = response.to_json() + topics = await manager.list_topics() - assert "topics" in data - assert "list_topic_1" in data["topics"] - assert "list_topic_2" in data["topics"] + assert "list_topic_1" in topics + assert "list_topic_2" in topics # ============================================================================ From b4c2266da85124d53012c0082ff428d1f03f5c97 Mon Sep 17 00:00:00 2001 From: Reiase Date: Sun, 25 Jan 2026 23:09:28 +0800 Subject: [PATCH 7/7] Refactor error handling and enhance error types in Pulsing actor system - Unified error handling by introducing a clear hierarchy of error types, including `PulsingError`, `RuntimeError`, and `ActorError`. - Updated the `PulsingError` enum to categorize errors into `RuntimeError` for framework-level issues and `ActorError` for user-defined execution errors. - Refactored error conversion logic to ensure seamless integration between Rust and Python, allowing for better error classification and handling. - Enhanced error messages for clarity and consistency across the system, improving debugging and user feedback. - Updated tests to validate the new error handling mechanisms and ensure proper functionality across various scenarios. --- .../pulsing-actor/src/behavior/reference.rs | 11 +- crates/pulsing-actor/src/error.rs | 571 ++++++++++++------ crates/pulsing-actor/src/system/handler.rs | 5 +- crates/pulsing-actor/src/system/resolve.rs | 14 +- crates/pulsing-actor/src/system/spawn.rs | 5 +- .../src/transport/http2/client.rs | 36 +- .../pulsing-actor/src/transport/http2/mod.rs | 33 +- crates/pulsing-py/src/actor.rs | 56 +- crates/pulsing-py/src/errors.rs | 62 ++ crates/pulsing-py/src/lib.rs | 5 + .../pulsing-py/src/python_error_converter.rs | 132 ++++ python/pulsing/__init__.py | 20 + python/pulsing/actor/__init__.py | 17 +- python/pulsing/actor/remote.py | 113 +++- python/pulsing/exceptions.py | 182 ++++++ tests/python/test_agent_runtime_lifecycle.py | 20 +- tests/python/test_remote_decorator.py | 8 +- 17 files changed, 1035 insertions(+), 255 deletions(-) create mode 100644 crates/pulsing-py/src/errors.rs create mode 100644 crates/pulsing-py/src/python_error_converter.rs create mode 100644 python/pulsing/exceptions.py diff --git a/crates/pulsing-actor/src/behavior/reference.rs b/crates/pulsing-actor/src/behavior/reference.rs index eb2f34bc2..2c38d091c 100644 --- a/crates/pulsing-actor/src/behavior/reference.rs +++ b/crates/pulsing-actor/src/behavior/reference.rs @@ -1,5 +1,6 @@ use crate::actor::ActorRef; use crate::actor::ActorSystemRef; +use crate::error::{PulsingError, RuntimeError}; use serde::{de::DeserializeOwned, Serialize}; use std::marker::PhantomData; use std::sync::Arc; @@ -72,9 +73,13 @@ where fn resolve(&self) -> anyhow::Result { match &self.mode { ResolutionMode::Direct(inner) => Ok(inner.clone()), - ResolutionMode::Dynamic(system) => system - .local_actor_ref_by_name(&self.name) - .ok_or_else(|| anyhow::anyhow!("Actor not found: {}", self.name)), + ResolutionMode::Dynamic(system) => { + system.local_actor_ref_by_name(&self.name).ok_or_else(|| { + anyhow::Error::from(PulsingError::from(RuntimeError::actor_not_found( + self.name.clone(), + ))) + }) + } } } diff --git a/crates/pulsing-actor/src/error.rs b/crates/pulsing-actor/src/error.rs index 0d247c8bb..eb9f2bee8 100644 --- a/crates/pulsing-actor/src/error.rs +++ b/crates/pulsing-actor/src/error.rs @@ -1,108 +1,112 @@ //! Unified error types for the actor system. +//! +//! Error hierarchy (matches Python exception structure): +//! - PulsingError: Top-level error enum +//! - RuntimeError: Framework/system-level errors +//! - Actor system errors (NotFound, Stopped, etc.) +//! - Transport errors (ConnectionFailed, etc.) +//! - Cluster errors (NodeNotFound, etc.) +//! - Config errors (InvalidValue, etc.) +//! - I/O errors, Serialization errors +//! → Maps to Python: PulsingRuntimeError +//! - ActorError: User Actor execution errors +//! - Business errors (user input errors) +//! - System errors (internal errors from user code) +//! - Timeout errors (operation timeouts) +//! - Unsupported errors (unsupported operations) +//! → Maps to Python: PulsingActorError (and subclasses) use thiserror::Error; /// Unified error type for the Pulsing actor system /// /// This enum encompasses all error categories in the system. -/// It implements `From` for each sub-error type for easy conversion. +/// Errors are divided into two main categories: +/// - RuntimeError: Framework/system-level errors +/// - ActorError: User Actor execution errors #[derive(Error, Debug)] pub enum PulsingError { - /// Actor-related errors + /// Runtime errors: Framework/system-level errors + #[error("Runtime error: {0}")] + Runtime(#[from] RuntimeError), + + /// Actor errors: User Actor execution errors #[error("Actor error: {0}")] Actor(#[from] ActorError), - - /// Transport layer errors - #[error("Transport error: {0}")] - Transport(#[from] TransportError), - - /// Cluster-related errors - #[error("Cluster error: {0}")] - Cluster(#[from] ClusterError), - - /// Configuration errors - #[error("Configuration error: {0}")] - Config(#[from] ConfigError), - - /// I/O errors - #[error("I/O error: {0}")] - Io(#[from] std::io::Error), - - /// Serialization/deserialization errors - #[error("Serialization error: {0}")] - Serialization(String), - - /// Timeout errors - #[error("Timeout: {0}")] - Timeout(String), - - /// Generic errors (for cases not covered by specific types) - #[error("{0}")] - Other(String), } impl PulsingError { - /// Create a generic error from a message - pub fn other(msg: impl Into) -> Self { - Self::Other(msg.into()) + /// Check if this is a runtime error + pub fn is_runtime(&self) -> bool { + matches!(self, Self::Runtime(_)) } - /// Create a timeout error - pub fn timeout(msg: impl Into) -> Self { - Self::Timeout(msg.into()) - } - - /// Create a serialization error - pub fn serialization(msg: impl Into) -> Self { - Self::Serialization(msg.into()) + /// Check if this is an actor error + pub fn is_actor(&self) -> bool { + matches!(self, Self::Actor(_)) } } impl From for PulsingError { fn from(err: anyhow::Error) -> Self { // Try to downcast to known error types + if let Some(runtime_err) = err.downcast_ref::() { + return Self::Runtime(runtime_err.clone()); + } if let Some(actor_err) = err.downcast_ref::() { return Self::Actor(actor_err.clone()); } - if let Some(transport_err) = err.downcast_ref::() { - return Self::Transport(transport_err.clone()); - } - if let Some(cluster_err) = err.downcast_ref::() { - return Self::Cluster(cluster_err.clone()); + // Try to downcast to PulsingError itself + if let Some(pulsing_err) = err.downcast_ref::() { + return pulsing_err.clone(); } - if let Some(config_err) = err.downcast_ref::() { - return Self::Config(config_err.clone()); + // Default to runtime error for unknown errors + Self::Runtime(RuntimeError::Other(err.to_string())) + } +} + +// Implement Clone for PulsingError to support downcast +impl Clone for PulsingError { + fn clone(&self) -> Self { + match self { + Self::Runtime(e) => Self::Runtime(e.clone()), + Self::Actor(e) => Self::Actor(e.clone()), } - Self::Other(err.to_string()) } } -/// Actor-related errors +/// Runtime errors: Framework/system-level errors +/// +/// These errors occur at the framework level and are not caused by user code. +/// Examples: transport failures, cluster issues, configuration errors, etc. #[derive(Error, Debug, Clone, PartialEq, Eq)] -pub enum ActorError { +pub enum RuntimeError { + // ========================================================================= + // Actor system errors (framework-level) + // ========================================================================= /// Actor not found by name or ID #[error("Actor not found: {name}")] - NotFound { name: String }, + ActorNotFound { name: String }, /// Actor already exists with the given name #[error("Actor already exists: {name}")] - AlreadyExists { name: String }, + ActorAlreadyExists { name: String }, /// Actor is not local to this node #[error("Actor is not local: {name}")] - NotLocal { name: String }, + ActorNotLocal { name: String }, /// Actor has stopped and cannot process messages #[error("Actor stopped: {name}")] - Stopped { name: String }, + ActorStopped { name: String }, /// Actor mailbox is full #[error("Actor mailbox full: {name}")] - MailboxFull { name: String }, + ActorMailboxFull { name: String }, /// Invalid actor path format #[error("Invalid actor path: {path}")] - InvalidPath { path: String }, + InvalidActorPath { path: String }, /// Message type mismatch #[error("Message type mismatch: expected {expected}, got {actual}")] @@ -110,41 +114,11 @@ pub enum ActorError { /// Actor spawn failed #[error("Failed to spawn actor: {reason}")] - SpawnFailed { reason: String }, -} - -impl ActorError { - /// Create a "not found" error - pub fn not_found(name: impl Into) -> Self { - Self::NotFound { name: name.into() } - } - - /// Create an "already exists" error - pub fn already_exists(name: impl Into) -> Self { - Self::AlreadyExists { name: name.into() } - } - - /// Create a "mailbox full" error - pub fn mailbox_full(name: impl Into) -> Self { - Self::MailboxFull { name: name.into() } - } - - /// Create an "invalid path" error - pub fn invalid_path(path: impl Into) -> Self { - Self::InvalidPath { path: path.into() } - } - - /// Create a "spawn failed" error - pub fn spawn_failed(reason: impl Into) -> Self { - Self::SpawnFailed { - reason: reason.into(), - } - } -} + ActorSpawnFailed { reason: String }, -/// Transport layer errors -#[derive(Error, Debug, Clone, PartialEq, Eq)] -pub enum TransportError { + // ========================================================================= + // Transport errors + // ========================================================================= /// Connection failed #[error("Connection failed to {addr}: {reason}")] ConnectionFailed { addr: String, reason: String }, @@ -168,36 +142,13 @@ pub enum TransportError { /// Protocol error (HTTP/2) #[error("Protocol error: {reason}")] ProtocolError { reason: String }, -} - -impl TransportError { - /// Create a connection failed error - pub fn connection_failed(addr: impl Into, reason: impl Into) -> Self { - Self::ConnectionFailed { - addr: addr.into(), - reason: reason.into(), - } - } - - /// Create a request timeout error - pub fn request_timeout(timeout_ms: u64) -> Self { - Self::RequestTimeout { timeout_ms } - } - - /// Create a TLS error - pub fn tls_error(reason: impl Into) -> Self { - Self::TlsError { - reason: reason.into(), - } - } -} -/// Cluster-related errors -#[derive(Error, Debug, Clone, PartialEq, Eq)] -pub enum ClusterError { + // ========================================================================= + // Cluster errors + // ========================================================================= /// Cluster not initialized #[error("Cluster not initialized")] - NotInitialized, + ClusterNotInitialized, /// Node not found in cluster #[error("Node not found: {node_id}")] @@ -218,12 +169,134 @@ pub enum ClusterError { /// Gossip protocol error #[error("Gossip error: {reason}")] GossipError { reason: String }, + + // ========================================================================= + // Configuration errors + // ========================================================================= + /// Invalid configuration value + #[error("Invalid configuration: {field} = {value} ({reason})")] + InvalidConfigValue { + field: String, + value: String, + reason: String, + }, + + /// Missing required configuration + #[error("Missing required configuration: {field}")] + MissingRequiredConfig { field: String }, + + /// Conflicting configuration options + #[error("Conflicting configuration: {reason}")] + ConflictingConfig { reason: String }, + + /// Address parsing error + #[error("Invalid address '{addr}': {reason}")] + InvalidAddress { addr: String, reason: String }, + + // ========================================================================= + // Other runtime errors + // ========================================================================= + /// I/O errors + #[error("I/O error: {0}")] + Io(String), + + /// Serialization/deserialization errors + #[error("Serialization error: {0}")] + Serialization(String), + + /// Generic runtime errors + #[error("{0}")] + Other(String), } -impl ClusterError { - /// Create a "not initialized" error - pub fn not_initialized() -> Self { - Self::NotInitialized +impl RuntimeError { + // ========================================================================= + // Actor system error constructors + // ========================================================================= + + /// Create an "actor not found" error + pub fn actor_not_found(name: impl Into) -> Self { + Self::ActorNotFound { name: name.into() } + } + + /// Create an "actor already exists" error + pub fn actor_already_exists(name: impl Into) -> Self { + Self::ActorAlreadyExists { name: name.into() } + } + + /// Create an "actor not local" error + pub fn actor_not_local(name: impl Into) -> Self { + Self::ActorNotLocal { name: name.into() } + } + + /// Create an "actor stopped" error + pub fn actor_stopped(name: impl Into) -> Self { + Self::ActorStopped { name: name.into() } + } + + /// Create an "actor mailbox full" error + pub fn actor_mailbox_full(name: impl Into) -> Self { + Self::ActorMailboxFull { name: name.into() } + } + + /// Create an "invalid actor path" error + pub fn invalid_actor_path(path: impl Into) -> Self { + Self::InvalidActorPath { path: path.into() } + } + + /// Create a "message type mismatch" error + pub fn message_type_mismatch(expected: impl Into, actual: impl Into) -> Self { + Self::MessageTypeMismatch { + expected: expected.into(), + actual: actual.into(), + } + } + + /// Create an "actor spawn failed" error + pub fn actor_spawn_failed(reason: impl Into) -> Self { + Self::ActorSpawnFailed { + reason: reason.into(), + } + } + + // ========================================================================= + // Transport error constructors + // ========================================================================= + + /// Create a connection failed error + pub fn connection_failed(addr: impl Into, reason: impl Into) -> Self { + Self::ConnectionFailed { + addr: addr.into(), + reason: reason.into(), + } + } + + /// Create a request timeout error + pub fn request_timeout(timeout_ms: u64) -> Self { + Self::RequestTimeout { timeout_ms } + } + + /// Create a TLS error + pub fn tls_error(reason: impl Into) -> Self { + Self::TlsError { + reason: reason.into(), + } + } + + /// Create a protocol error + pub fn protocol_error(reason: impl Into) -> Self { + Self::ProtocolError { + reason: reason.into(), + } + } + + // ========================================================================= + // Cluster error constructors + // ========================================================================= + + /// Create a "cluster not initialized" error + pub fn cluster_not_initialized() -> Self { + Self::ClusterNotInitialized } /// Create a "node not found" error @@ -242,56 +315,34 @@ impl ClusterError { pub fn no_healthy_instances(path: impl Into) -> Self { Self::NoHealthyInstances { path: path.into() } } -} - -/// Configuration-related errors -#[derive(Error, Debug, Clone, PartialEq, Eq)] -pub enum ConfigError { - /// Invalid configuration value - #[error("Invalid configuration: {field} = {value} ({reason})")] - InvalidValue { - field: String, - value: String, - reason: String, - }, - - /// Missing required configuration - #[error("Missing required configuration: {field}")] - MissingRequired { field: String }, - /// Conflicting configuration options - #[error("Conflicting configuration: {reason}")] - Conflicting { reason: String }, - - /// Address parsing error - #[error("Invalid address '{addr}': {reason}")] - InvalidAddress { addr: String, reason: String }, -} + // ========================================================================= + // Config error constructors + // ========================================================================= -impl ConfigError { - /// Create an "invalid value" error - pub fn invalid_value( + /// Create an "invalid config value" error + pub fn invalid_config_value( field: impl Into, value: impl Into, reason: impl Into, ) -> Self { - Self::InvalidValue { + Self::InvalidConfigValue { field: field.into(), value: value.into(), reason: reason.into(), } } - /// Create a "missing required" error - pub fn missing_required(field: impl Into) -> Self { - Self::MissingRequired { + /// Create a "missing required config" error + pub fn missing_required_config(field: impl Into) -> Self { + Self::MissingRequiredConfig { field: field.into(), } } - /// Create a "conflicting" error - pub fn conflicting(reason: impl Into) -> Self { - Self::Conflicting { + /// Create a "conflicting config" error + pub fn conflicting_config(reason: impl Into) -> Self { + Self::ConflictingConfig { reason: reason.into(), } } @@ -303,8 +354,145 @@ impl ConfigError { reason: reason.into(), } } + + // ========================================================================= + // Other error constructors + // ========================================================================= + + /// Create a serialization error + pub fn serialization(msg: impl Into) -> Self { + Self::Serialization(msg.into()) + } + + /// Create a generic runtime error + pub fn other(msg: impl Into) -> Self { + Self::Other(msg.into()) + } + + /// Create an I/O error from std::io::Error + pub fn io(err: std::io::Error) -> Self { + Self::Io(err.to_string()) + } +} + +impl From for RuntimeError { + fn from(err: std::io::Error) -> Self { + Self::Io(err.to_string()) + } +} + +/// Actor errors: User Actor execution errors +/// +/// These errors are raised by user code during Actor execution. +/// They are distinct from RuntimeError which are framework-level errors. +#[derive(Error, Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ActorError { + /// Business error: User input error, business logic error + /// These are recoverable and should be returned to the caller + #[error("Business error [{code}]: {message}")] + Business { + code: u32, + message: String, + #[serde(skip_serializing_if = "Option::is_none")] + details: Option, + }, + + /// System error: Internal error, resource error + /// May trigger Actor restart depending on recoverable flag + #[error("System error: {error}")] + System { error: String, recoverable: bool }, + + /// Timeout error: Operation timed out + /// Usually recoverable, can be retried + #[error("Timeout: operation '{operation}' timed out after {duration_ms}ms")] + Timeout { operation: String, duration_ms: u64 }, + + /// Unsupported operation + #[error("Unsupported operation: {operation}")] + Unsupported { operation: String }, +} + +impl ActorError { + /// Create a business error + pub fn business(code: u32, message: impl Into, details: Option) -> Self { + Self::Business { + code, + message: message.into(), + details, + } + } + + /// Create a system error + pub fn system(error: impl Into, recoverable: bool) -> Self { + Self::System { + error: error.into(), + recoverable, + } + } + + /// Create a timeout error + pub fn timeout(operation: impl Into, duration_ms: u64) -> Self { + Self::Timeout { + operation: operation.into(), + duration_ms, + } + } + + /// Create an unsupported operation error + pub fn unsupported(operation: impl Into) -> Self { + Self::Unsupported { + operation: operation.into(), + } + } + + /// Check if this error is recoverable + /// + /// - Business errors: always recoverable (return to caller) + /// - System errors: depends on recoverable flag + /// - Timeout errors: usually recoverable (can retry) + /// - Unsupported errors: not recoverable + pub fn is_recoverable(&self) -> bool { + match self { + Self::Business { .. } => true, + Self::System { recoverable, .. } => *recoverable, + Self::Timeout { .. } => true, + Self::Unsupported { .. } => false, + } + } + + /// Check if this is a business error + pub fn is_business(&self) -> bool { + matches!(self, Self::Business { .. }) + } + + /// Check if this is a system error + pub fn is_system(&self) -> bool { + matches!(self, Self::System { .. }) + } + + /// Check if this is a timeout error + pub fn is_timeout(&self) -> bool { + matches!(self, Self::Timeout { .. }) + } } +// ============================================================================= +// Legacy type aliases for backward compatibility +// ============================================================================= + +/// Legacy: TransportError (now part of RuntimeError) +#[deprecated(note = "Use RuntimeError instead")] +pub type TransportError = RuntimeError; + +/// Legacy: ClusterError (now part of RuntimeError) +#[deprecated(note = "Use RuntimeError instead")] +pub type ClusterError = RuntimeError; + +/// Legacy: ConfigError (now part of RuntimeError) +#[deprecated(note = "Use RuntimeError instead")] +pub type ConfigError = RuntimeError; + /// Convenience type alias for results using PulsingError pub type Result = std::result::Result; @@ -313,45 +501,37 @@ mod tests { use super::*; #[test] - fn test_actor_error_display() { - let err = ActorError::not_found("my-actor"); + fn test_runtime_error_display() { + let err = RuntimeError::actor_not_found("my-actor"); assert!(err.to_string().contains("my-actor")); - let err = ActorError::already_exists("existing-actor"); - assert!(err.to_string().contains("existing-actor")); - } - - #[test] - fn test_transport_error_display() { - let err = TransportError::connection_failed("127.0.0.1:8000", "connection refused"); + let err = RuntimeError::connection_failed("127.0.0.1:8000", "connection refused"); assert!(err.to_string().contains("127.0.0.1:8000")); assert!(err.to_string().contains("refused")); - - let err = TransportError::request_timeout(5000); - assert!(err.to_string().contains("5000")); } #[test] - fn test_cluster_error_display() { - let err = ClusterError::not_initialized(); - assert!(err.to_string().contains("not initialized")); + fn test_actor_error_display() { + let err = ActorError::business(400, "Invalid input", None); + assert!(err.to_string().contains("400")); + assert!(err.to_string().contains("Invalid input")); - let err = ClusterError::named_actor_not_found("services/echo"); - assert!(err.to_string().contains("services/echo")); + let err = ActorError::system("Database error", true); + assert!(err.to_string().contains("Database error")); } #[test] - fn test_config_error_display() { - let err = ConfigError::invalid_value("mailbox_capacity", "0", "must be > 0"); - assert!(err.to_string().contains("mailbox_capacity")); + fn test_pulsing_error_from_runtime_error() { + let runtime_err = RuntimeError::actor_not_found("test"); + let pulsing_err: PulsingError = runtime_err.into(); - let err = ConfigError::conflicting("cannot be both head node and worker"); - assert!(err.to_string().contains("head node")); + assert!(matches!(pulsing_err, PulsingError::Runtime(_))); + assert!(pulsing_err.to_string().contains("test")); } #[test] fn test_pulsing_error_from_actor_error() { - let actor_err = ActorError::not_found("test"); + let actor_err = ActorError::business(400, "test", None); let pulsing_err: PulsingError = actor_err.into(); assert!(matches!(pulsing_err, PulsingError::Actor(_))); @@ -359,28 +539,25 @@ mod tests { } #[test] - fn test_pulsing_error_from_transport_error() { - let transport_err = TransportError::request_timeout(3000); - let pulsing_err: PulsingError = transport_err.into(); - - assert!(matches!(pulsing_err, PulsingError::Transport(_))); - assert!(pulsing_err.to_string().contains("3000")); - } - - #[test] - fn test_pulsing_error_helpers() { - let err = PulsingError::other("something went wrong"); - assert!(err.to_string().contains("wrong")); - - let err = PulsingError::timeout("operation timed out"); - assert!(err.to_string().contains("timed out")); + fn test_error_classification() { + let business_err = ActorError::business(400, "test", None); + assert!(business_err.is_recoverable()); + assert!(business_err.is_business()); + + let system_err = ActorError::system("error", true); + assert!(system_err.is_recoverable()); + assert!(system_err.is_system()); + + let timeout_err = ActorError::timeout("op", 1000); + assert!(timeout_err.is_recoverable()); + assert!(timeout_err.is_timeout()); } #[test] fn test_error_equality() { - let err1 = ActorError::not_found("test"); - let err2 = ActorError::not_found("test"); - let err3 = ActorError::not_found("other"); + let err1 = ActorError::business(400, "test", None); + let err2 = ActorError::business(400, "test", None); + let err3 = ActorError::business(400, "other", None); assert_eq!(err1, err2); assert_ne!(err1, err3); diff --git a/crates/pulsing-actor/src/system/handler.rs b/crates/pulsing-actor/src/system/handler.rs index a1416008b..8fc4b148f 100644 --- a/crates/pulsing-actor/src/system/handler.rs +++ b/crates/pulsing-actor/src/system/handler.rs @@ -4,6 +4,7 @@ use super::handle::LocalActorHandle; use crate::actor::{ActorId, ActorPath, Envelope, Message, NodeId}; use crate::cluster::backends::{RegisterActorRequest, UnregisterActorRequest}; use crate::cluster::{GossipBackend, GossipMessage, HeadNodeBackend, NamingBackend}; +use crate::error::{PulsingError, RuntimeError}; use crate::metrics::{metrics, SystemMetrics as PrometheusMetrics}; use crate::transport::Http2ServerHandler; use dashmap::DashMap; @@ -59,7 +60,9 @@ impl SystemMessageHandler { } } - Err(anyhow::anyhow!("Actor not found: {}", actor_name)) + Err(anyhow::Error::from(PulsingError::from( + RuntimeError::actor_not_found(actor_name.to_string()), + ))) } /// Dispatch a message to an actor (ask pattern) diff --git a/crates/pulsing-actor/src/system/resolve.rs b/crates/pulsing-actor/src/system/resolve.rs index 8df676e60..cce4959a5 100644 --- a/crates/pulsing-actor/src/system/resolve.rs +++ b/crates/pulsing-actor/src/system/resolve.rs @@ -7,6 +7,7 @@ use crate::actor::{ ActorAddress, ActorId, ActorPath, ActorRef, ActorResolver, IntoActorPath, NodeId, }; use crate::cluster::{MemberInfo, MemberStatus, NamedActorInfo}; +use crate::error::{PulsingError, RuntimeError}; use crate::policies::LoadBalancingPolicy; use crate::system::config::ResolveOptions; use crate::system::load_balancer::{MemberWorker, NodeLoadTracker}; @@ -48,7 +49,9 @@ impl ActorSystem { return Ok(ActorRef::remote(*id, member_info.addr, Arc::new(transport))); } - Err(anyhow::anyhow!("Actor not found: {}", id)) + Err(anyhow::Error::from(PulsingError::from( + RuntimeError::actor_not_found(id.to_string()), + ))) } /// Resolve a named actor by path (direct resolution) @@ -157,10 +160,11 @@ impl ActorSystem { .ok_or_else(|| anyhow::anyhow!("Named actor not found locally"))? .clone(); - let local_id = self - .actor_names - .get(&actor_name) - .ok_or_else(|| anyhow::anyhow!("Actor not found: {}", actor_name))?; + let local_id = self.actor_names.get(&actor_name).ok_or_else(|| { + anyhow::Error::from(PulsingError::from(RuntimeError::actor_not_found( + actor_name.clone(), + ))) + })?; let handle = self .local_actors diff --git a/crates/pulsing-actor/src/system/spawn.rs b/crates/pulsing-actor/src/system/spawn.rs index 313021a54..14adee7f6 100644 --- a/crates/pulsing-actor/src/system/spawn.rs +++ b/crates/pulsing-actor/src/system/spawn.rs @@ -7,6 +7,7 @@ //! All other spawn methods delegate to the builder. use crate::actor::{Actor, ActorContext, ActorId, ActorPath, ActorRef, ActorSystemRef, Mailbox}; +use crate::error::{PulsingError, RuntimeError}; use crate::system::config::SpawnOptions; use crate::system::handle::{ActorStats, LocalActorHandle}; use crate::system::runtime::run_supervision_loop; @@ -33,7 +34,9 @@ impl ActorSystem { // Check for name conflicts (only for named actors) if let Some(ref name) = name_str { if self.actor_names.contains_key(name) { - return Err(anyhow::anyhow!("Actor already exists: {}", name)); + return Err(anyhow::Error::from(PulsingError::from( + RuntimeError::actor_already_exists(name.clone()), + ))); } if self.named_actor_paths.contains_key(name) { return Err(anyhow::anyhow!("Named path already registered: {}", name)); diff --git a/crates/pulsing-actor/src/transport/http2/client.rs b/crates/pulsing-actor/src/transport/http2/client.rs index 058fd0523..5940caa1a 100644 --- a/crates/pulsing-actor/src/transport/http2/client.rs +++ b/crates/pulsing-actor/src/transport/http2/client.rs @@ -6,6 +6,7 @@ use super::retry::{RetryConfig, RetryExecutor}; use super::stream::{BinaryFrameParser, StreamFrame, StreamHandle}; use super::{headers, MessageMode, RequestType}; use crate::actor::{Message, MessageStream}; +use crate::error::RuntimeError; use crate::tracing::{TraceContext, TRACEPARENT_HEADER}; use bytes::Bytes; use futures::{Stream, StreamExt, TryStreamExt}; @@ -308,8 +309,13 @@ impl Http2Client { let tcp_stream = tokio::time::timeout(self.config.connect_timeout, TcpStream::connect(addr)) .await - .map_err(|_| anyhow::anyhow!("Connection timeout"))? - .map_err(|e| anyhow::anyhow!("Failed to connect: {}", e))?; + .map_err(|_| { + RuntimeError::connection_failed( + addr.to_string(), + "Connection timeout".to_string(), + ) + })? + .map_err(|e| RuntimeError::connection_failed(addr.to_string(), e.to_string()))?; // Build HTTP/2 connection with streaming body type - with or without TLS type StreamingBody = @@ -373,7 +379,9 @@ impl Http2Client { .header(TRACEPARENT_HEADER, trace_ctx.to_traceparent()) .header("content-type", "application/octet-stream") .body(body) - .map_err(|e| anyhow::anyhow!("Failed to build request: {}", e))?; + .map_err(|e| { + RuntimeError::protocol_error(format!("Failed to build request: {}", e)) + })?; let send_future = sender.send_request(request); let response = tokio::time::timeout(self.config.stream_timeout, send_future) @@ -389,7 +397,9 @@ impl Http2Client { let (mut sender, conn): (http2::SendRequest, _) = http2::handshake(TokioExecutor::new(), io) .await - .map_err(|e| anyhow::anyhow!("HTTP/2 handshake failed: {}", e))?; + .map_err(|e| { + RuntimeError::protocol_error(format!("HTTP/2 handshake failed: {}", e)) + })?; // Spawn connection driver let cancel = self.cancel.clone(); @@ -446,14 +456,18 @@ impl Http2Client { .header(TRACEPARENT_HEADER, trace_ctx.to_traceparent()) .header("content-type", "application/octet-stream") .body(body) - .map_err(|e| anyhow::anyhow!("Failed to build request: {}", e))?; + .map_err(|e| RuntimeError::protocol_error(format!("Failed to build request: {}", e)))?; // Send request with timeout let send_future = sender.send_request(request); let response = tokio::time::timeout(self.config.stream_timeout, send_future) .await - .map_err(|_| anyhow::anyhow!("Streaming request timeout"))? - .map_err(|e| anyhow::anyhow!("Streaming request failed: {}", e))?; + .map_err(|_| { + RuntimeError::request_timeout(self.config.stream_timeout.as_millis() as u64) + })? + .map_err(|e| { + RuntimeError::protocol_error(format!("Streaming request failed: {}", e)) + })?; Ok(response) } @@ -588,14 +602,16 @@ impl Http2Client { .header(TRACEPARENT_HEADER, trace_ctx.to_traceparent()) .header("content-type", "application/octet-stream") .body(Full::new(Bytes::from(payload))) - .map_err(|e| anyhow::anyhow!("Failed to build request: {}", e))?; + .map_err(|e| RuntimeError::protocol_error(format!("Failed to build request: {}", e)))?; // Send request with timeout let send_future = conn.sender.send_request(request); let response = tokio::time::timeout(self.config.request_timeout, send_future) .await - .map_err(|_| anyhow::anyhow!("Request timeout"))? - .map_err(|e| anyhow::anyhow!("Request failed: {}", e))?; + .map_err(|_| { + RuntimeError::request_timeout(self.config.request_timeout.as_millis() as u64) + })? + .map_err(|e| RuntimeError::protocol_error(format!("Request failed: {}", e)))?; Ok(response) } diff --git a/crates/pulsing-actor/src/transport/http2/mod.rs b/crates/pulsing-actor/src/transport/http2/mod.rs index 91ec9113c..25292d3d8 100644 --- a/crates/pulsing-actor/src/transport/http2/mod.rs +++ b/crates/pulsing-actor/src/transport/http2/mod.rs @@ -7,6 +7,8 @@ mod retry; mod server; mod stream; +use crate::error::RuntimeError; + #[cfg(feature = "tls")] mod tls; @@ -106,7 +108,7 @@ impl Http2Transport { ) -> anyhow::Result<()> { let path = format!("/actors/{}", actor_name); let Message::Single { msg_type, data } = msg else { - return Err(anyhow::anyhow!("Streaming not supported for tell")); + return Err(RuntimeError::protocol_error("Streaming not supported for tell").into()); }; self.client.tell(addr, &path, &msg_type, data).await @@ -120,7 +122,7 @@ impl Http2Transport { ) -> anyhow::Result<()> { let url_path = format!("/named/{}", path.as_str()); let Message::Single { msg_type, data } = msg else { - return Err(anyhow::anyhow!("Streaming not supported for tell")); + return Err(RuntimeError::protocol_error("Streaming not supported for tell").into()); }; self.client.tell(addr, &url_path, &msg_type, data).await @@ -350,10 +352,11 @@ impl RemoteTransport for Http2RemoteTransport { ) -> anyhow::Result> { // Check circuit breaker before making request if !self.circuit_breaker.can_execute() { - return Err(anyhow::anyhow!( - "Circuit breaker is open for {}", - self.remote_addr - )); + return Err(RuntimeError::ConnectionFailed { + addr: self.remote_addr.to_string(), + reason: "Circuit breaker is open".to_string(), + } + .into()); } let result = self @@ -374,10 +377,11 @@ impl RemoteTransport for Http2RemoteTransport { ) -> anyhow::Result<()> { // Check circuit breaker before making request if !self.circuit_breaker.can_execute() { - return Err(anyhow::anyhow!( - "Circuit breaker is open for {}", - self.remote_addr - )); + return Err(RuntimeError::ConnectionFailed { + addr: self.remote_addr.to_string(), + reason: "Circuit breaker is open".to_string(), + } + .into()); } let result = self @@ -399,10 +403,11 @@ impl RemoteTransport for Http2RemoteTransport { async fn send_message(&self, _actor_id: &ActorId, msg: Message) -> anyhow::Result { // Check circuit breaker before making request if !self.circuit_breaker.can_execute() { - return Err(anyhow::anyhow!( - "Circuit breaker is open for {}", - self.remote_addr - )); + return Err(RuntimeError::ConnectionFailed { + addr: self.remote_addr.to_string(), + reason: "Circuit breaker is open".to_string(), + } + .into()); } // Use unified send_message_full that handles both single and streaming diff --git a/crates/pulsing-py/src/actor.rs b/crates/pulsing-py/src/actor.rs index 461e4af86..40349461e 100644 --- a/crates/pulsing-py/src/actor.rs +++ b/crates/pulsing-py/src/actor.rs @@ -2,9 +2,10 @@ use futures::StreamExt; use pulsing_actor::actor::{ActorId, ActorPath, NodeId}; +use pulsing_actor::error::PulsingError; use pulsing_actor::prelude::*; use pulsing_actor::supervision::{BackoffStrategy, RestartPolicy, SupervisionSpec}; -use pyo3::exceptions::{PyException, PyRuntimeError, PyStopAsyncIteration, PyValueError}; +use pyo3::exceptions::{PyRuntimeError, PyStopAsyncIteration, PyValueError}; use pyo3::prelude::*; use pyo3::types::PyBytes; use std::net::SocketAddr; @@ -13,13 +14,27 @@ use std::sync::Mutex as StdMutex; use tokio::sync::mpsc; use tokio::sync::Mutex as TokioMutex; +use crate::errors::pulsing_error_to_py_err_direct; +use crate::python_error_converter::convert_python_exception_to_actor_error; use crate::python_executor::python_executor; /// Special message type identifier for pickle-encoded Python objects const SEALED_PY_MSG_TYPE: &str = "__sealed_py_message__"; +/// Convert error to Python exception +/// Prefer using pulsing_error_to_py_err_direct for PulsingError types fn to_pyerr(err: E) -> PyErr { - PyException::new_err(format!("{}", err)) + // Try to downcast to PulsingError + let err_str = err.to_string(); + + // For non-PulsingError types, use RuntimeError + // In practice, most errors from pulsing-actor should be PulsingError + PyRuntimeError::new_err(err_str) +} + +/// Convert PulsingError to Python exception +fn pulsing_to_pyerr(err: PulsingError) -> PyErr { + pulsing_error_to_py_err_direct(err) } /// Python wrapper for NodeId @@ -924,7 +939,7 @@ impl Actor for PythonActorWrapper { let is_sealed_msg = msg.msg_type() == SEALED_PY_MSG_TYPE; let py_msg = PyMessage::from_rust_message(msg); - let response = python_executor() + let response: Result = python_executor() .execute(move || { Python::with_gil(|py| -> PyResult { let receive_method = handler.getattr(py, "receive")?; @@ -939,7 +954,18 @@ impl Actor for PythonActorWrapper { py_msg.into_pyobject(py)?.into_any().unbind() }; - let result = receive_method.call1(py, (call_arg,))?; + let result = receive_method.call1(py, (call_arg,)); + + // Handle Python exceptions and convert to ActorError + let result = match result { + Ok(value) => value, + Err(py_err) => { + // Convert Python exception to ActorError + // We need to return this as an error in the Python execution context + // The error will be caught and converted at the Rust level + return Err(py_err); + } + }; let asyncio = py.import("asyncio")?; let is_coro = asyncio @@ -1023,8 +1049,22 @@ impl Actor for PythonActorWrapper { }) }) .await - .map_err(|e| anyhow::anyhow!("Python executor error: {:?}", e))? - .map_err(|e| anyhow::anyhow!("Python handler error: {:?}", e))?; + .map_err(|e| anyhow::anyhow!("Python executor error: {:?}", e))?; + + // Convert Python exceptions to ActorError + let response = match response { + Ok(resp) => resp, + Err(py_err) => { + // Convert Python exception to ActorError + Python::with_gil(|py| { + let actor_err = convert_python_exception_to_actor_error(py, &py_err)?; + // Convert ActorError to PulsingError and then to anyhow::Error + Err(anyhow::Error::from( + pulsing_actor::error::PulsingError::from(actor_err), + )) + }) + }?, + }; match response { PyActorResponse::Single(msg) => Ok(msg.to_message()), @@ -1133,7 +1173,9 @@ impl PyActorSystem { ) -> PyResult> { let config_inner = config.inner; pyo3_async_runtimes::tokio::future_into_py(py, async move { - let system = ActorSystem::new(config_inner).await.map_err(to_pyerr)?; + let system = ActorSystem::new(config_inner) + .await + .map_err(|e| pulsing_to_pyerr(PulsingError::from(e)))?; Ok(PyActorSystem { inner: system, event_loop, diff --git a/crates/pulsing-py/src/errors.rs b/crates/pulsing-py/src/errors.rs new file mode 100644 index 000000000..680bec7fc --- /dev/null +++ b/crates/pulsing-py/src/errors.rs @@ -0,0 +1,62 @@ +//! Python exception bindings for Pulsing errors +//! +//! This module converts Rust error types to Python exceptions. +//! Due to PyO3 abi3 limitations, we use PyRuntimeError as the base +//! and let Python layer re-raise as appropriate exception types. + +use pulsing_actor::error::{PulsingError, RuntimeError}; +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; + +/// Convert Rust PulsingError to appropriate Python exception +/// +/// This function prefixes error messages with error type markers so Python +/// layer can identify and re-raise as appropriate exception types. +pub fn pulsing_error_to_py_err(err: PulsingError) -> PyErr { + let err_msg = err.to_string(); + + match &err { + // Actor errors (user code errors) -> prefix with "ACTOR_ERROR:" + PulsingError::Actor(_actor_err) => { + PyRuntimeError::new_err(format!("ACTOR_ERROR:{}", err_msg)) + } + // Runtime errors (framework errors) -> prefix with "RUNTIME_ERROR:" + PulsingError::Runtime(runtime_err) => { + // Extract actor name if available for runtime errors + let actor_name = match runtime_err { + RuntimeError::ActorNotFound { name } => Some(name.clone()), + RuntimeError::ActorAlreadyExists { name } => Some(name.clone()), + RuntimeError::ActorNotLocal { name } => Some(name.clone()), + RuntimeError::ActorStopped { name } => Some(name.clone()), + RuntimeError::ActorMailboxFull { name } => Some(name.clone()), + RuntimeError::InvalidActorPath { path: _ } => None, + RuntimeError::MessageTypeMismatch { .. } => None, + RuntimeError::ActorSpawnFailed { .. } => None, + _ => None, + }; + + let full_msg = if let Some(ref name) = actor_name { + format!("RUNTIME_ERROR:{}:actor={}", err_msg, name) + } else { + format!("RUNTIME_ERROR:{}", err_msg) + }; + + PyRuntimeError::new_err(full_msg) + } + } +} + +/// Convert PulsingError to Python exception (preferred method) +pub fn pulsing_error_to_py_err_direct(err: PulsingError) -> PyErr { + pulsing_error_to_py_err(err) +} + +/// Add error classes to Python module +/// +/// Note: In abi3 mode, we can't create custom exception classes directly. +/// Exception classes are defined in Python (pulsing/exceptions.py). +/// This function is kept for API consistency. +pub fn add_to_module(_m: &Bound<'_, PyModule>) -> PyResult<()> { + // Error classes are defined in Python layer + Ok(()) +} diff --git a/crates/pulsing-py/src/lib.rs b/crates/pulsing-py/src/lib.rs index c191dd7e6..a9f34e6ea 100644 --- a/crates/pulsing-py/src/lib.rs +++ b/crates/pulsing-py/src/lib.rs @@ -6,7 +6,9 @@ use pyo3::prelude::*; mod actor; +mod errors; mod policies; +mod python_error_converter; mod python_executor; pub use python_executor::{init_python_executor, python_executor, ExecutorError}; @@ -30,6 +32,9 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { .try_init() .ok(); + // Add error classes + errors::add_to_module(m)?; + // Add actor system classes actor::add_to_module(m)?; diff --git a/crates/pulsing-py/src/python_error_converter.rs b/crates/pulsing-py/src/python_error_converter.rs new file mode 100644 index 000000000..abc4eb41c --- /dev/null +++ b/crates/pulsing-py/src/python_error_converter.rs @@ -0,0 +1,132 @@ +//! Convert Python exceptions to Rust ActorError +//! +//! This module provides automatic conversion from Python exceptions +//! to unified ActorError types, enabling seamless error handling +//! across Rust and Python boundaries. + +use pulsing_actor::error::ActorError; +use pyo3::exceptions::{PyTimeoutError, PyTypeError, PyValueError}; +use pyo3::prelude::*; + +/// Convert Python exception (PyErr) to ActorError +/// +/// This function automatically classifies Python exceptions: +/// - ValueError, TypeError -> Business error +/// - TimeoutError -> Timeout error +/// - Other exceptions -> System error +pub fn convert_python_exception_to_actor_error( + py: Python, + err: &PyErr, +) -> anyhow::Result { + // Try to extract exception type and message + let err_type = err.get_type(py); + let type_name = err_type.name()?.to_string(); + let err_msg = err.to_string(); + + // Check for specific exception types + if err.is_instance_of::(py) { + // Timeout error + return Ok(ActorError::timeout("python_operation", 0)); + } + + if err.is_instance_of::(py) || err.is_instance_of::(py) { + // Business error: validation/type errors + return Ok(ActorError::business(400, err_msg, None)); + } + + // Check if it's a custom Pulsing exception + // Try to extract error details from exception attributes + let py_err_obj = err.value(py); + + // Check for PulsingBusinessError + if let Ok(code_attr) = py_err_obj.getattr("code") { + if let Ok(code) = code_attr.extract::() { + let message_attr = py_err_obj.getattr("message").ok(); + let message = message_attr + .and_then(|m| m.extract::().ok()) + .unwrap_or_else(|| err_msg.clone()); + + let details_attr = py_err_obj.getattr("details").ok(); + let details = details_attr.and_then(|d| d.extract::().ok()); + + return Ok(ActorError::business(code, message, details)); + } + } + + // Check for PulsingSystemError + if let Ok(error_attr) = py_err_obj.getattr("error") { + if let Ok(error_msg) = error_attr.extract::() { + let recoverable_attr = py_err_obj.getattr("recoverable").ok(); + let recoverable = recoverable_attr + .and_then(|r| r.extract::().ok()) + .unwrap_or(true); + + return Ok(ActorError::system(error_msg, recoverable)); + } + } + + // Check for PulsingTimeoutError (has both operation and duration_ms) + if let Ok(operation_attr) = py_err_obj.getattr("operation") { + if let Ok(operation) = operation_attr.extract::() { + let duration_attr = py_err_obj.getattr("duration_ms").ok(); + if let Some(duration_ms) = duration_attr.and_then(|d| d.extract::().ok()) { + // Has duration_ms -> Timeout error + return Ok(ActorError::timeout(operation, duration_ms)); + } + } + } + + // Check for PulsingUnsupportedError (by type name or operation attribute without duration_ms) + if type_name.contains("Unsupported") || type_name.contains("unsupported") { + if let Ok(operation_attr) = py_err_obj.getattr("operation") { + if let Ok(operation) = operation_attr.extract::() { + return Ok(ActorError::unsupported(operation)); + } + } + // Fallback: use error message as operation + return Ok(ActorError::unsupported(err_msg)); + } + + // Default: classify based on exception type name + match type_name.as_str() { + "TimeoutError" | "asyncio.TimeoutError" => Ok(ActorError::timeout("python_operation", 0)), + "ValueError" | "TypeError" | "KeyError" | "AttributeError" => { + // Business errors: user input errors + Ok(ActorError::business(400, err_msg, None)) + } + "RuntimeError" | "SystemError" | "OSError" | "IOError" => { + // System errors: internal errors + Ok(ActorError::system(err_msg, true)) + } + _ => { + // Unknown exception type: treat as system error + Ok(ActorError::system( + format!("{}: {}", type_name, err_msg), + true, + )) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_convert_timeout_error() { + Python::with_gil(|py| { + let err = PyTimeoutError::new_err("Operation timed out"); + let actor_err = convert_python_exception_to_actor_error(py, &err).unwrap(); + assert!(matches!(actor_err, ActorError::Timeout { .. })); + }); + } + + #[test] + fn test_convert_value_error() { + Python::with_gil(|py| { + let err = PyValueError::new_err("Invalid value"); + let actor_err = convert_python_exception_to_actor_error(py, &err).unwrap(); + assert!(matches!(actor_err, ActorError::Business { code: 400, .. })); + }); + } +} diff --git a/python/pulsing/__init__.py b/python/pulsing/__init__.py index fcf482e25..f0854a3eb 100644 --- a/python/pulsing/__init__.py +++ b/python/pulsing/__init__.py @@ -83,6 +83,17 @@ def incr(self): self.value += 1; return self.value PYTHON_ACTOR_SERVICE_NAME, ) +# Import exceptions +from pulsing.exceptions import ( + PulsingError, + PulsingRuntimeError, + PulsingActorError, + PulsingBusinessError, + PulsingSystemError, + PulsingTimeoutError, + PulsingUnsupportedError, +) + class ActorSystem: """ActorSystem wrapper with queue API @@ -274,4 +285,13 @@ async def refer(actorid: ActorId | str) -> ActorRef: "ActorProxy", "Message", "StreamMessage", + # Exceptions + "PulsingError", + "PulsingRuntimeError", + "PulsingActorError", + # Business-level exceptions (automatically converted to ActorError) + "PulsingBusinessError", + "PulsingSystemError", + "PulsingTimeoutError", + "PulsingUnsupportedError", ] diff --git a/python/pulsing/actor/__init__.py b/python/pulsing/actor/__init__.py index 15041bd29..7a6893260 100644 --- a/python/pulsing/actor/__init__.py +++ b/python/pulsing/actor/__init__.py @@ -110,7 +110,11 @@ async def shutdown() -> None: def get_system() -> ActorSystem: """Get the global actor system (must call init() first)""" if _global_system is None: - raise RuntimeError("Actor system not initialized. Call 'await init()' first.") + from pulsing.exceptions import PulsingRuntimeError + + raise PulsingRuntimeError( + "Actor system not initialized. Call 'await init()' first." + ) return _global_system @@ -200,6 +204,13 @@ async def tell_with_timeout( resolve, ) +# Import exceptions for convenience +from pulsing.exceptions import ( + PulsingError, + PulsingRuntimeError, + PulsingActorError, +) + # NOTE: `__all__` is the *public, stable surface* for `from pulsing.actor import *`. # We intentionally keep it minimal. Advanced/diagnostic APIs may still be # importable by name, but are not part of the stable top-level contract. @@ -225,6 +236,10 @@ async def tell_with_timeout( # Service (for actor_system function) "PythonActorService", "PYTHON_ACTOR_SERVICE_NAME", + # Exceptions + "PulsingError", + "PulsingRuntimeError", + "PulsingActorError", ] diff --git a/python/pulsing/actor/remote.py b/python/pulsing/actor/remote.py index cd621ca79..dc1515685 100644 --- a/python/pulsing/actor/remote.py +++ b/python/pulsing/actor/remote.py @@ -9,6 +9,77 @@ from typing import Any, TypeVar from pulsing._core import ActorRef, ActorSystem, Message, StreamMessage +from pulsing.exceptions import PulsingActorError, PulsingRuntimeError + + +def _convert_rust_error(err: RuntimeError) -> Exception: + """Convert Rust-raised RuntimeError to appropriate Pulsing exception. + + Rust layer prefixes error messages with markers: + - "ACTOR_ERROR:" -> PulsingActorError (or specific subclasses) + - "RUNTIME_ERROR:" -> PulsingRuntimeError + + The error message format for ActorError: + - "ACTOR_ERROR:Business error [code]: message" -> PulsingBusinessError + - "ACTOR_ERROR:System error: message" -> PulsingSystemError + - "ACTOR_ERROR:Timeout: operation 'op' timed out..." -> PulsingTimeoutError + - "ACTOR_ERROR:Unsupported operation: op" -> PulsingUnsupportedError + """ + from pulsing.exceptions import ( + PulsingBusinessError, + PulsingSystemError, + PulsingTimeoutError, + PulsingUnsupportedError, + ) + + err_msg = str(err) + + if err_msg.startswith("ACTOR_ERROR:"): + msg = err_msg.replace("ACTOR_ERROR:", "") + + # Try to identify specific ActorError type from message + if msg.startswith("Business error ["): + # Extract code, message, and details from "Business error [code]: message" + import re + + match = re.match(r"Business error \[(\d+)\]: (.+)", msg) + if match: + code = int(match.group(1)) + message = match.group(2) + return PulsingBusinessError(code, message) + + if msg.startswith("System error: "): + # Extract error message from "System error: message" + error_msg = msg.replace("System error: ", "") + # Default to recoverable=True (we don't have recoverable flag in message) + return PulsingSystemError(error_msg, recoverable=True) + + if msg.startswith("Timeout: operation '"): + # Extract operation and duration from "Timeout: operation 'op' timed out after Xms" + import re + + match = re.match( + r"Timeout: operation '([^']+)' timed out after (\d+)ms", msg + ) + if match: + operation = match.group(1) + duration_ms = int(match.group(2)) + return PulsingTimeoutError(operation, duration_ms) + + if msg.startswith("Unsupported operation: "): + # Extract operation from "Unsupported operation: op" + operation = msg.replace("Unsupported operation: ", "") + return PulsingUnsupportedError(operation) + + # Fallback: generic PulsingActorError + return PulsingActorError(msg) + elif err_msg.startswith("RUNTIME_ERROR:"): + msg = err_msg.replace("RUNTIME_ERROR:", "") + return PulsingRuntimeError(msg) + else: + # Unknown format, wrap as RuntimeError + return PulsingRuntimeError(err_msg) + logger = logging.getLogger(__name__) @@ -127,14 +198,25 @@ async def _sync_call(self, *args, **kwargs) -> Any: if isinstance(resp, dict): if "__error__" in resp: - raise RuntimeError(resp["__error__"]) + # Actor execution error + try: + raise PulsingActorError( + resp["__error__"], actor_name=str(self._ref.actor_id.id) + ) + except RuntimeError as e: + # If it's a Rust error, convert it + raise _convert_rust_error(e) from e return resp.get("__result__") elif isinstance(resp, Message): if resp.is_stream: return _SyncGeneratorStreamReader(resp) data = resp.to_json() if resp.msg_type == "Error": - raise RuntimeError(data.get("error", "Remote call failed")) + # Actor execution error + raise PulsingActorError( + data.get("error", "Remote call failed"), + actor_name=str(self._ref.actor_id.id), + ) return data.get("result") return resp @@ -182,7 +264,11 @@ async def _get_stream(self): # Not streaming, might be an error data = resp.to_json() if resp.msg_type == "Error": - raise RuntimeError(data.get("error", "Remote call failed")) + # Actor execution error + raise PulsingActorError( + data.get("error", "Remote call failed"), + actor_name=str(self._ref.actor_id.id), + ) # Wrap as single-value iterator self._stream_reader = _SingleValueIterator(data) else: @@ -207,7 +293,10 @@ async def __anext__(self): self._got_result = True raise StopAsyncIteration if "__error__" in item: - raise RuntimeError(item["__error__"]) + # Actor execution error + raise PulsingActorError( + item["__error__"], actor_name=str(self._ref.actor_id.id) + ) if "__yield__" in item: return item["__yield__"] return item @@ -264,7 +353,10 @@ async def __anext__(self): self._got_result = True raise StopAsyncIteration if "__error__" in item: - raise RuntimeError(item["__error__"]) + # Actor execution error + raise PulsingActorError( + item["__error__"], actor_name=str(self._ref.actor_id.id) + ) if "__yield__" in item: return item["__yield__"] return item @@ -606,7 +698,7 @@ def incr(self): self.value += 1; return self.value from . import _global_system if _global_system is None: - raise RuntimeError( + raise PulsingRuntimeError( "Actor system not initialized. Call 'await init()' first." ) @@ -747,7 +839,8 @@ async def remote( data = resp.to_json() if resp.msg_type == "Error": - raise RuntimeError(f"Remote create failed: {data.get('error')}") + # System error: actor creation failed + raise PulsingRuntimeError(f"Remote create failed: {data.get('error')}") # Build remote ActorRef from pulsing._core import ActorId @@ -905,7 +998,8 @@ async def list_actors(self) -> list[dict]: """List all actors on this node.""" data = await self._ask("ListActors") if data.get("type") == "Error": - raise RuntimeError(data.get("message")) + # System error: system message failed + raise PulsingRuntimeError(data.get("message")) return data.get("actors", []) async def get_metrics(self) -> dict: @@ -1025,7 +1119,8 @@ async def create_actor( ) data = resp.to_json() if resp.msg_type == "Error" or data.get("error"): - raise RuntimeError(data.get("error", "Unknown error")) + # System error: actor creation failed + raise PulsingRuntimeError(data.get("error", "Unknown error")) return data diff --git a/python/pulsing/exceptions.py b/python/pulsing/exceptions.py new file mode 100644 index 000000000..545812c3f --- /dev/null +++ b/python/pulsing/exceptions.py @@ -0,0 +1,182 @@ +"""Pulsing exception hierarchy. + +This module provides Python exceptions that correspond to Rust error types. +The exceptions are defined in Python but correspond to Rust error types defined +in crates/pulsing-actor/src/error.rs using thiserror. + +Errors are divided into two categories (matching Rust error structure): + +1. PulsingRuntimeError: Framework/system-level errors + Corresponds to: pulsing_actor::error::RuntimeError + + These are framework-level errors, not caused by user code: + - Actor system errors (NotFound, Stopped, etc.) + - Transport errors (ConnectionFailed, etc.) + - Cluster errors (NodeNotFound, etc.) + - Config errors (InvalidValue, etc.) + - I/O errors, Serialization errors + +2. PulsingActorError: User Actor execution errors + Corresponds to: pulsing_actor::error::ActorError + + These are errors raised by user code during Actor execution: + - Business errors (user input errors) → PulsingBusinessError + - System errors (internal errors from user code) → PulsingSystemError + - Timeout errors (operation timeouts) → PulsingTimeoutError + - Unsupported errors (unsupported operations) → PulsingUnsupportedError + +Note: Due to PyO3 abi3 limitations, we define exceptions in Python and +Rust code raises them using PyRuntimeError with message prefixes. +The Python layer can catch and re-raise as appropriate types. + +For Actor execution errors, use the specific exception types below which +will be automatically converted to Rust ActorError variants. +""" + + +class PulsingError(Exception): + """Base exception for all Pulsing errors. + + This corresponds to pulsing_actor::error::PulsingError in Rust. + """ + + pass + + +class PulsingRuntimeError(PulsingError): + """Framework/system-level errors. + + This corresponds to pulsing_actor::error::RuntimeError in Rust. + + These are framework-level errors, not caused by user code: + - Actor system errors (NotFound, Stopped, etc.) + - Transport errors (ConnectionFailed, etc.) + - Cluster errors (NodeNotFound, etc.) + - Config errors (InvalidValue, etc.) + - I/O errors + - Serialization errors + """ + + def __init__(self, message: str, cause: Exception | None = None): + super().__init__(message) + self.cause = cause + + +class PulsingActorError(PulsingError): + """User Actor execution errors. + + This corresponds to pulsing_actor::error::ActorError in Rust. + + These are errors raised by user code during Actor execution: + - Business errors (user input errors) + - System errors (internal errors from user code) + - Timeout errors (operation timeouts) + - Unsupported errors (unsupported operations) + + Note: Framework-level errors like "Actor not found" are RuntimeError, + not ActorError. + """ + + def __init__( + self, + message: str, + actor_name: str | None = None, + cause: Exception | None = None, + ): + super().__init__(message) + self.actor_name = actor_name + self.cause = cause + + +# ============================================================================ +# Business-level error types (automatically converted to ActorError) +# ============================================================================ + + +class PulsingBusinessError(PulsingActorError): + """Business error: User input error, business logic error. + + These errors are recoverable and should be returned to the caller. + Automatically converted to ActorError::Business in Rust. + + Example: + @remote + class UserActor: + async def validate_age(self, age: int) -> bool: + if age < 18: + raise PulsingBusinessError(400, "Age must be >= 18", + details="User validation failed") + return True + """ + + def __init__(self, code: int, message: str, details: str | None = None): + self.code = code + self.message = message + self.details = details + super().__init__(f"[{code}] {message}", cause=None) + + +class PulsingSystemError(PulsingActorError): + """System error: Internal error, resource error. + + May trigger Actor restart depending on recoverable flag. + Automatically converted to ActorError::System in Rust. + + Example: + @remote + class DataProcessor: + async def process(self, data: str) -> str: + try: + return process_data(data) + except Exception as e: + raise PulsingSystemError(f"Processing failed: {e}", recoverable=True) + """ + + def __init__(self, error: str, recoverable: bool = True): + self.error = error + self.recoverable = recoverable + super().__init__(error, cause=None) + + +class PulsingTimeoutError(PulsingActorError): + """Timeout error: Operation timed out. + + Usually recoverable, can be retried. + Automatically converted to ActorError::Timeout in Rust. + + Example: + @remote + class NetworkActor: + async def fetch(self, url: str) -> str: + try: + return await asyncio.wait_for(httpx.get(url), timeout=5.0) + except asyncio.TimeoutError: + raise PulsingTimeoutError("fetch", duration_ms=5000) + """ + + def __init__(self, operation: str, duration_ms: int = 0): + self.operation = operation + self.duration_ms = duration_ms + super().__init__( + f"Operation '{operation}' timed out after {duration_ms}ms", cause=None + ) + + +class PulsingUnsupportedError(PulsingActorError): + """Unsupported operation error. + + Not recoverable. Indicates that the requested operation is not supported. + Automatically converted to ActorError::Unsupported in Rust. + + Example: + @remote + class LegacyActor: + async def process(self, data: str) -> str: + if data.startswith("legacy:"): + raise PulsingUnsupportedError("process") + return process_data(data) + """ + + def __init__(self, operation: str): + self.operation = operation + super().__init__(f"Unsupported operation: {operation}", cause=None) diff --git a/tests/python/test_agent_runtime_lifecycle.py b/tests/python/test_agent_runtime_lifecycle.py index a58a152f8..0201f5309 100644 --- a/tests/python/test_agent_runtime_lifecycle.py +++ b/tests/python/test_agent_runtime_lifecycle.py @@ -64,7 +64,9 @@ async def test_basic_create_destroy(self): assert result == 10 # After runtime exits, global system should be cleaned up - with pytest.raises(RuntimeError, match="Actor system not initialized"): + from pulsing.exceptions import PulsingRuntimeError + + with pytest.raises(PulsingRuntimeError, match="Actor system not initialized"): get_system() @pytest.mark.asyncio @@ -77,7 +79,9 @@ async def test_repeated_create_destroy(self): assert result == i # Check system is cleaned up after each exit - with pytest.raises(RuntimeError): + from pulsing.exceptions import PulsingRuntimeError + + with pytest.raises(PulsingRuntimeError): get_system() @pytest.mark.asyncio @@ -163,7 +167,9 @@ async def test_multiple_actors_cleanup(self): assert results == list(range(10)) # After runtime exits, system should clean up all actors - with pytest.raises(RuntimeError): + from pulsing.exceptions import PulsingRuntimeError + + with pytest.raises(PulsingRuntimeError): get_system() @pytest.mark.asyncio @@ -197,7 +203,9 @@ async def test_exception_during_runtime(self): pass # Even with exception, system should be cleaned up - with pytest.raises(RuntimeError): + from pulsing.exceptions import PulsingRuntimeError + + with pytest.raises(PulsingRuntimeError): get_system() clear_agent_registry() @@ -341,7 +349,9 @@ async def test_empty_runtime(self): async with runtime(): pass - with pytest.raises(RuntimeError): + from pulsing.exceptions import PulsingRuntimeError + + with pytest.raises(PulsingRuntimeError): get_system() @pytest.mark.asyncio diff --git a/tests/python/test_remote_decorator.py b/tests/python/test_remote_decorator.py index 57083463b..f5be18b43 100644 --- a/tests/python/test_remote_decorator.py +++ b/tests/python/test_remote_decorator.py @@ -77,7 +77,9 @@ def will_fail(self): try: service = await ErrorService.spawn() - with pytest.raises(RuntimeError, match="Intentional error"): + from pulsing.exceptions import PulsingActorError + + with pytest.raises(PulsingActorError, match="Intentional error"): await service.will_fail() finally: @@ -100,7 +102,9 @@ async def will_fail(self): try: service = await AsyncErrorService.spawn() - with pytest.raises(RuntimeError, match="Async error"): + from pulsing.exceptions import PulsingActorError + + with pytest.raises(PulsingActorError, match="Async error"): await service.will_fail() finally: